scout-browser 4.98.0__py3-none-any.whl → 4.100.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scout/adapter/mongo/case.py +30 -15
- scout/adapter/mongo/clinvar.py +23 -31
- scout/adapter/mongo/event.py +14 -4
- scout/adapter/mongo/institute.py +42 -55
- scout/adapter/mongo/omics_variant.py +14 -1
- scout/adapter/mongo/query.py +24 -1
- scout/adapter/mongo/variant.py +44 -22
- scout/adapter/mongo/variant_loader.py +169 -186
- scout/build/individual.py +5 -1
- scout/build/variant/variant.py +8 -0
- scout/commands/download/ensembl.py +18 -3
- scout/commands/load/research.py +2 -3
- scout/commands/update/individual.py +3 -0
- scout/commands/update/panelapp.py +15 -2
- scout/constants/__init__.py +6 -2
- scout/constants/clnsig.py +2 -0
- scout/constants/file_types.py +12 -0
- scout/constants/igv_tracks.py +9 -6
- scout/constants/indexes.py +5 -4
- scout/constants/panels.py +3 -0
- scout/constants/query_terms.py +1 -0
- scout/constants/variant_tags.py +6 -6
- scout/demo/643594.config.yaml +1 -0
- scout/load/panelapp.py +11 -5
- scout/models/case/case.py +1 -0
- scout/models/case/case_loading_models.py +7 -1
- scout/parse/ensembl.py +8 -3
- scout/parse/variant/clnsig.py +38 -0
- scout/parse/variant/genotype.py +4 -10
- scout/parse/variant/models.py +5 -11
- scout/parse/variant/rank_score.py +5 -13
- scout/parse/variant/variant.py +90 -111
- scout/server/app.py +39 -22
- scout/server/blueprints/alignviewers/controllers.py +29 -10
- scout/server/blueprints/alignviewers/templates/alignviewers/igv_viewer.html +51 -11
- scout/server/blueprints/cases/controllers.py +9 -3
- scout/server/blueprints/cases/templates/cases/case_report.html +25 -13
- scout/server/blueprints/cases/templates/cases/chanjo2_form.html +1 -1
- scout/server/blueprints/cases/templates/cases/collapsible_actionbar.html +1 -1
- scout/server/blueprints/cases/templates/cases/gene_panel.html +1 -1
- scout/server/blueprints/cases/templates/cases/utils.html +25 -6
- scout/server/blueprints/clinvar/controllers.py +34 -15
- scout/server/blueprints/clinvar/templates/clinvar/clinvar_submissions.html +34 -12
- scout/server/blueprints/clinvar/templates/clinvar/multistep_add_variant.html +14 -5
- scout/server/blueprints/clinvar/views.py +14 -2
- scout/server/blueprints/diagnoses/static/diagnoses.js +8 -1
- scout/server/blueprints/institutes/controllers.py +10 -2
- scout/server/blueprints/institutes/static/variants_list_scripts.js +9 -1
- scout/server/blueprints/institutes/templates/overview/institute_sidebar.html +9 -1
- scout/server/blueprints/login/controllers.py +112 -12
- scout/server/blueprints/login/views.py +38 -60
- scout/server/blueprints/mme/__init__.py +1 -0
- scout/server/blueprints/mme/controllers.py +18 -0
- scout/server/blueprints/mme/templates/mme/mme_submissions.html +153 -0
- scout/server/blueprints/mme/views.py +34 -0
- scout/server/blueprints/panels/templates/panels/panel.html +19 -6
- scout/server/blueprints/phenotypes/templates/phenotypes/hpo_terms.html +8 -1
- scout/server/blueprints/public/templates/public/index.html +5 -1
- scout/server/blueprints/variant/controllers.py +19 -10
- scout/server/blueprints/variant/templates/variant/acmg.html +15 -2
- scout/server/blueprints/variant/templates/variant/cancer-variant.html +1 -1
- scout/server/blueprints/variant/templates/variant/components.html +38 -16
- scout/server/blueprints/variant/templates/variant/sv-variant.html +2 -2
- scout/server/blueprints/variant/templates/variant/utils.html +23 -11
- scout/server/blueprints/variant/templates/variant/variant.html +42 -1
- scout/server/blueprints/variant/views.py +12 -0
- scout/server/blueprints/variants/controllers.py +20 -3
- scout/server/blueprints/variants/forms.py +8 -3
- scout/server/blueprints/variants/templates/variants/components.html +34 -0
- scout/server/blueprints/variants/templates/variants/indicators.html +11 -13
- scout/server/blueprints/variants/templates/variants/mei-variants.html +8 -6
- scout/server/blueprints/variants/templates/variants/sv-variants.html +9 -7
- scout/server/blueprints/variants/templates/variants/utils.html +35 -34
- scout/server/blueprints/variants/templates/variants/variants.html +4 -25
- scout/server/config.py +8 -0
- scout/server/extensions/bionano_extension.py +0 -1
- scout/server/extensions/chanjo2_extension.py +54 -13
- scout/server/links.py +15 -0
- scout/server/static/bs_styles.css +34 -6
- scout/server/templates/utils.html +9 -10
- scout/server/utils.py +40 -5
- scout/utils/acmg.py +25 -26
- scout/utils/ensembl_biomart_clients.py +2 -1
- scout/utils/ensembl_rest_clients.py +25 -32
- scout/utils/hgvs.py +1 -1
- scout/utils/scout_requests.py +1 -3
- {scout_browser-4.98.0.dist-info → scout_browser-4.100.0.dist-info}/METADATA +10 -14
- {scout_browser-4.98.0.dist-info → scout_browser-4.100.0.dist-info}/RECORD +91 -87
- {scout_browser-4.98.0.dist-info → scout_browser-4.100.0.dist-info}/WHEEL +0 -0
- {scout_browser-4.98.0.dist-info → scout_browser-4.100.0.dist-info}/entry_points.txt +0 -0
- {scout_browser-4.98.0.dist-info → scout_browser-4.100.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,12 +1,15 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
# stdlib modules
|
3
3
|
import logging
|
4
|
+
import sys
|
4
5
|
from datetime import datetime
|
6
|
+
from typing import Dict, Iterable, Optional
|
5
7
|
|
6
8
|
import cyvcf2
|
7
9
|
|
8
10
|
# Third party modules
|
9
11
|
import pymongo
|
12
|
+
from click import progressbar
|
10
13
|
from cyvcf2 import VCF, Variant
|
11
14
|
from intervaltree import IntervalTree
|
12
15
|
from pymongo.errors import BulkWriteError, DuplicateKeyError
|
@@ -349,54 +352,35 @@ class VariantLoader(object):
|
|
349
352
|
|
350
353
|
def _load_variants(
|
351
354
|
self,
|
352
|
-
variants,
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
This is the function that loops over the variants, parse them and build the variant
|
355
|
+
variants: Iterable[cyvcf2.Variant],
|
356
|
+
nr_variants: int,
|
357
|
+
variant_type: str,
|
358
|
+
case_obj: dict,
|
359
|
+
individual_positions: dict,
|
360
|
+
rank_threshold: int,
|
361
|
+
institute_id: str,
|
362
|
+
build: Optional[str] = None,
|
363
|
+
rank_results_header: Optional[list] = None,
|
364
|
+
vep_header: Optional[list] = None,
|
365
|
+
category: str = "snv",
|
366
|
+
sample_info: Optional[dict] = None,
|
367
|
+
custom_images: Optional[dict] = None,
|
368
|
+
local_archive_info: Optional[dict] = None,
|
369
|
+
gene_to_panels: Optional[Dict[str, set]] = None,
|
370
|
+
hgncid_to_gene: Optional[Dict[int, dict]] = None,
|
371
|
+
genomic_intervals: Optional[Dict[str, IntervalTree]] = None,
|
372
|
+
) -> int:
|
373
|
+
"""This is the function that loops over the variants, parses them and builds the variant
|
372
374
|
objects so they are ready to be inserted into the database.
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
variant_type(str): ['clinical', 'research']
|
377
|
-
case_obj(dict)
|
378
|
-
individual_positions(dict): How individuals are positioned in vcf
|
379
|
-
rank_treshold(int): Only load variants with a rank score > than this
|
380
|
-
institute_id(str)
|
381
|
-
build(str): Genome build
|
382
|
-
rank_results_header(list): Rank score categories
|
383
|
-
vep_header(list)
|
384
|
-
category(str): ['snv','sv','cancer','str']
|
385
|
-
sample_info(dict): A dictionary with info about samples.
|
386
|
-
Strictly for cancer to tell which is tumor
|
387
|
-
custom_images(dict): A dict with custom images for a case.
|
388
|
-
local_archive_info(dict): A dict with info about the local archive used for annotation
|
389
|
-
|
390
|
-
Returns:
|
391
|
-
nr_inserted(int)
|
375
|
+
All variants with rank score above rank_threshold are loaded. All MT, pathogenic, managed or variants causative in other cases are also loaded.
|
376
|
+
individual_positions refers to the order of samples in the VCF file. sample_info contains info about samples. It is used for instance to define tumor samples in cancer cases.
|
377
|
+
local_archive_info contains info about the local archive used for annotation.
|
392
378
|
"""
|
393
379
|
build = build or "37"
|
394
380
|
|
395
|
-
LOG.info("Start inserting {0} {1} variants into database".format(variant_type, category))
|
396
381
|
start_insertion = datetime.now()
|
397
382
|
start_five_thousand = datetime.now()
|
398
|
-
|
399
|
-
nr_variants = 0
|
383
|
+
|
400
384
|
# These are the number of variants that meet the criteria and gets inserted
|
401
385
|
nr_inserted = 0
|
402
386
|
# This is to keep track of blocks of inserted variants
|
@@ -408,123 +392,131 @@ class VariantLoader(object):
|
|
408
392
|
bulk = {}
|
409
393
|
current_region = None
|
410
394
|
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
variant=parsed_variant,
|
448
|
-
institute_id=institute_id,
|
449
|
-
gene_to_panels=gene_to_panels,
|
450
|
-
hgncid_to_gene=hgncid_to_gene,
|
451
|
-
sample_info=sample_info,
|
452
|
-
)
|
453
|
-
|
454
|
-
# Check if the variant is in a genomic region
|
455
|
-
var_chrom = variant_obj["chromosome"]
|
456
|
-
var_start = variant_obj["position"]
|
457
|
-
# We need to make sure that the interval has a length > 0
|
458
|
-
var_end = variant_obj["end"] + 1
|
459
|
-
var_id = variant_obj["_id"]
|
460
|
-
# If the bulk should be loaded or not
|
461
|
-
load = True
|
462
|
-
new_region = None
|
463
|
-
|
464
|
-
intervals = genomic_intervals.get(var_chrom, IntervalTree())
|
465
|
-
genomic_regions = intervals.overlap(var_start, var_end)
|
466
|
-
|
467
|
-
# If the variant is in a coding region
|
468
|
-
if genomic_regions:
|
469
|
-
# We know there is data here so get the interval id
|
470
|
-
new_region = genomic_regions.pop().data
|
471
|
-
# If the variant is in the same region as previous
|
472
|
-
# we add it to the same bulk
|
473
|
-
if new_region == current_region:
|
474
|
-
load = False
|
475
|
-
|
476
|
-
# This is the case where the variant is intergenic
|
477
|
-
else:
|
478
|
-
# If the previous variant was also intergenic we add the variant to the bulk
|
479
|
-
if not current_region:
|
480
|
-
load = False
|
481
|
-
# We need to have a max size of the bulk
|
482
|
-
if len(bulk) > 10000:
|
483
|
-
load = True
|
484
|
-
# Associate variant with image
|
485
|
-
if custom_images:
|
486
|
-
images = [
|
487
|
-
img for img in custom_images if img["str_repid"] == variant_obj["str_repid"]
|
488
|
-
]
|
489
|
-
if len(images) > 0:
|
490
|
-
variant_obj["custom_images"] = images
|
491
|
-
# Load the variant object
|
492
|
-
if load:
|
493
|
-
# If the variant bulk contains coding variants we want to update the compounds
|
494
|
-
if current_region:
|
495
|
-
self.update_compounds(bulk)
|
496
|
-
try:
|
497
|
-
# Load the variants
|
498
|
-
self.load_variant_bulk(list(bulk.values()))
|
499
|
-
nr_bulks += 1
|
500
|
-
except IntegrityError as error:
|
501
|
-
pass
|
502
|
-
bulk = {}
|
503
|
-
|
504
|
-
current_region = new_region
|
505
|
-
if var_id in bulk:
|
506
|
-
LOG.warning(
|
507
|
-
"Duplicated variant %s detected in same bulk. Attempting separate upsert.",
|
508
|
-
variant_obj.get("simple_id"),
|
395
|
+
LOG.info(f"Number of variants present on the VCF file:{nr_variants}")
|
396
|
+
with progressbar(
|
397
|
+
variants, label="Loading variants", length=nr_variants, file=sys.stdout
|
398
|
+
) as bar:
|
399
|
+
for idx, variant in enumerate(bar):
|
400
|
+
# All MT variants are loaded
|
401
|
+
mt_variant = variant.CHROM in ["M", "MT"]
|
402
|
+
rank_score = parse_rank_score(variant.INFO.get("RankScore"), case_obj["_id"])
|
403
|
+
pathogenic = is_pathogenic(variant)
|
404
|
+
managed = self._is_managed(variant, category)
|
405
|
+
causative = self._is_causative_other_cases(variant, category)
|
406
|
+
|
407
|
+
# Check if the variant should be loaded at all
|
408
|
+
# if rank score is None means there are no rank scores annotated, all variants will be loaded
|
409
|
+
# Otherwise we load all variants above a rank score treshold
|
410
|
+
# Except for MT variants where we load all variants
|
411
|
+
if (
|
412
|
+
(rank_score is None)
|
413
|
+
or (rank_score > rank_threshold)
|
414
|
+
or mt_variant
|
415
|
+
or pathogenic
|
416
|
+
or causative
|
417
|
+
or managed
|
418
|
+
or category in ["str"]
|
419
|
+
):
|
420
|
+
nr_inserted += 1
|
421
|
+
# Parse the vcf variant
|
422
|
+
parsed_variant = parse_variant(
|
423
|
+
variant=variant,
|
424
|
+
case=case_obj,
|
425
|
+
variant_type=variant_type,
|
426
|
+
rank_results_header=rank_results_header,
|
427
|
+
vep_header=vep_header,
|
428
|
+
individual_positions=individual_positions,
|
429
|
+
category=category,
|
430
|
+
local_archive_info=local_archive_info,
|
509
431
|
)
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
LOG.info("%s variants parsed", str(nr_variants))
|
519
|
-
LOG.info(
|
520
|
-
"Time to parse variants: %s",
|
521
|
-
(datetime.now() - start_five_thousand),
|
432
|
+
|
433
|
+
# Build the variant object
|
434
|
+
variant_obj = build_variant(
|
435
|
+
variant=parsed_variant,
|
436
|
+
institute_id=institute_id,
|
437
|
+
gene_to_panels=gene_to_panels,
|
438
|
+
hgncid_to_gene=hgncid_to_gene,
|
439
|
+
sample_info=sample_info,
|
522
440
|
)
|
523
|
-
start_five_thousand = datetime.now()
|
524
441
|
|
525
|
-
|
526
|
-
|
527
|
-
|
442
|
+
# Check if the variant is in a genomic region
|
443
|
+
var_chrom = variant_obj["chromosome"]
|
444
|
+
var_start = variant_obj["position"]
|
445
|
+
# We need to make sure that the interval has a length > 0
|
446
|
+
var_end = variant_obj["end"] + 1
|
447
|
+
var_id = variant_obj["_id"]
|
448
|
+
# If the bulk should be loaded or not
|
449
|
+
load = True
|
450
|
+
new_region = None
|
451
|
+
|
452
|
+
intervals = genomic_intervals.get(var_chrom, IntervalTree())
|
453
|
+
genomic_regions = intervals.overlap(var_start, var_end)
|
454
|
+
|
455
|
+
# If the variant is in a coding region
|
456
|
+
if genomic_regions:
|
457
|
+
# We know there is data here so get the interval id
|
458
|
+
new_region = genomic_regions.pop().data
|
459
|
+
# If the variant is in the same region as previous
|
460
|
+
# we add it to the same bulk
|
461
|
+
if new_region == current_region:
|
462
|
+
load = False
|
463
|
+
|
464
|
+
# This is the case where the variant is intergenic
|
465
|
+
else:
|
466
|
+
# If the previous variant was also intergenic we add the variant to the bulk
|
467
|
+
if not current_region:
|
468
|
+
load = False
|
469
|
+
# We need to have a max size of the bulk
|
470
|
+
if len(bulk) > 10000:
|
471
|
+
load = True
|
472
|
+
# Associate variant with image
|
473
|
+
if custom_images:
|
474
|
+
images = [
|
475
|
+
img
|
476
|
+
for img in custom_images
|
477
|
+
if img["str_repid"] == variant_obj["str_repid"]
|
478
|
+
]
|
479
|
+
if len(images) > 0:
|
480
|
+
variant_obj["custom_images"] = images
|
481
|
+
|
482
|
+
# Load the variant object
|
483
|
+
if load:
|
484
|
+
# If the variant bulk contains coding variants we want to update the compounds
|
485
|
+
if current_region:
|
486
|
+
self.update_compounds(bulk)
|
487
|
+
try:
|
488
|
+
# Load the variants
|
489
|
+
self.load_variant_bulk(list(bulk.values()))
|
490
|
+
nr_bulks += 1
|
491
|
+
except IntegrityError as error:
|
492
|
+
pass
|
493
|
+
bulk = {}
|
494
|
+
|
495
|
+
current_region = new_region
|
496
|
+
if var_id in bulk:
|
497
|
+
LOG.warning(
|
498
|
+
"Duplicated variant %s detected in same bulk. Attempting separate upsert.",
|
499
|
+
variant_obj.get("simple_id"),
|
500
|
+
)
|
501
|
+
try:
|
502
|
+
self.upsert_variant(variant_obj)
|
503
|
+
except IntegrityError as err:
|
504
|
+
pass
|
505
|
+
else:
|
506
|
+
bulk[var_id] = variant_obj
|
507
|
+
|
508
|
+
if nr_variants != 0 and nr_variants % 5000 == 0:
|
509
|
+
LOG.info("%s variants parsed", str(nr_variants))
|
510
|
+
LOG.info(
|
511
|
+
"Time to parse variants: %s",
|
512
|
+
(datetime.now() - start_five_thousand),
|
513
|
+
)
|
514
|
+
start_five_thousand = datetime.now()
|
515
|
+
|
516
|
+
if nr_inserted != 0 and (nr_inserted * inserted) % (1000 * inserted) == 0:
|
517
|
+
LOG.info("%s variants inserted", nr_inserted)
|
518
|
+
inserted += 1
|
519
|
+
|
528
520
|
# If the variants are in a coding region we update the compounds
|
529
521
|
if current_region:
|
530
522
|
self.update_compounds(bulk)
|
@@ -538,8 +530,6 @@ class VariantLoader(object):
|
|
538
530
|
)
|
539
531
|
)
|
540
532
|
|
541
|
-
if nr_variants:
|
542
|
-
nr_variants += 1
|
543
533
|
LOG.info("Nr variants parsed: %s", nr_variants)
|
544
534
|
LOG.info("Nr variants inserted: %s", nr_inserted)
|
545
535
|
LOG.debug("Nr bulks inserted: %s", nr_bulks)
|
@@ -621,16 +611,16 @@ class VariantLoader(object):
|
|
621
611
|
|
622
612
|
def load_variants(
|
623
613
|
self,
|
624
|
-
case_obj,
|
625
|
-
variant_type="clinical",
|
626
|
-
category="snv",
|
627
|
-
rank_threshold=None,
|
628
|
-
chrom=None,
|
629
|
-
start=None,
|
630
|
-
end=None,
|
631
|
-
gene_obj=None,
|
632
|
-
custom_images=None,
|
633
|
-
build="37",
|
614
|
+
case_obj: dict,
|
615
|
+
variant_type: str = "clinical",
|
616
|
+
category: str = "snv",
|
617
|
+
rank_threshold: float = None,
|
618
|
+
chrom: str = None,
|
619
|
+
start: int = None,
|
620
|
+
end: int = None,
|
621
|
+
gene_obj: dict = None,
|
622
|
+
custom_images: list = None,
|
623
|
+
build: str = "37",
|
634
624
|
):
|
635
625
|
"""Load variants for a case into scout.
|
636
626
|
|
@@ -657,30 +647,21 @@ class VariantLoader(object):
|
|
657
647
|
|
658
648
|
nr_inserted = 0
|
659
649
|
|
660
|
-
|
650
|
+
gene_to_panels = self.gene_to_panels(case_obj)
|
651
|
+
genes = list(self.all_genes(build=build))
|
652
|
+
hgncid_to_gene = self.hgncid_to_gene(genes=genes, build=build)
|
653
|
+
genomic_intervals = self.get_coding_intervals(genes=genes, build=build)
|
654
|
+
|
661
655
|
for vcf_file_key, vcf_dict in ORDERED_FILE_TYPE_MAP.items():
|
662
656
|
if vcf_dict["variant_type"] != variant_type:
|
663
657
|
continue
|
664
658
|
if vcf_dict["category"] != category:
|
665
659
|
continue
|
666
660
|
|
667
|
-
LOG.
|
661
|
+
LOG.info(f"Loading'{vcf_file_key}' variants")
|
668
662
|
variant_file = case_obj["vcf_files"].get(vcf_file_key)
|
669
|
-
if variant_file:
|
670
|
-
variant_files.append(variant_file)
|
671
663
|
|
672
|
-
|
673
|
-
raise SyntaxError(
|
674
|
-
"VCF files for {} {} does not seem to exist".format(category, variant_type)
|
675
|
-
)
|
676
|
-
|
677
|
-
gene_to_panels = self.gene_to_panels(case_obj)
|
678
|
-
genes = [gene_obj for gene_obj in self.all_genes(build=build)]
|
679
|
-
hgncid_to_gene = self.hgncid_to_gene(genes=genes, build=build)
|
680
|
-
genomic_intervals = self.get_coding_intervals(genes=genes, build=build)
|
681
|
-
|
682
|
-
for variant_file in variant_files:
|
683
|
-
if not self._has_variants_in_file(variant_file):
|
664
|
+
if not variant_file or not self._has_variants_in_file(variant_file):
|
684
665
|
continue
|
685
666
|
|
686
667
|
vcf_obj = VCF(variant_file)
|
@@ -722,11 +703,13 @@ class VariantLoader(object):
|
|
722
703
|
else:
|
723
704
|
rank_threshold = rank_threshold or 0
|
724
705
|
|
725
|
-
|
706
|
+
nr_variants = sum(1 for _ in vcf_obj(region))
|
707
|
+
vcf_obj = VCF(variant_file)
|
726
708
|
|
727
709
|
try:
|
728
710
|
nr_inserted = self._load_variants(
|
729
|
-
variants=
|
711
|
+
variants=vcf_obj(region),
|
712
|
+
nr_variants=nr_variants,
|
730
713
|
variant_type=variant_type,
|
731
714
|
case_obj=case_obj,
|
732
715
|
individual_positions=individual_positions,
|
scout/build/individual.py
CHANGED
@@ -6,9 +6,12 @@ from scout.exceptions import PedigreeError
|
|
6
6
|
|
7
7
|
log = logging.getLogger(__name__)
|
8
8
|
BUILD_INDIVIDUAL_FILES = [
|
9
|
+
"assembly_alignment_path",
|
9
10
|
"bam_file",
|
10
11
|
"d4_file",
|
12
|
+
"minor_allele_frequency_wig",
|
11
13
|
"mt_bam",
|
14
|
+
"paraphase_alignment_path",
|
12
15
|
"rhocall_bed",
|
13
16
|
"rhocall_wig",
|
14
17
|
"rna_alignment_path",
|
@@ -39,9 +42,10 @@ def build_individual(ind: dict) -> dict:
|
|
39
42
|
mother = str, # Individual id of mother
|
40
43
|
capture_kits = list, # List of names of capture kits
|
41
44
|
bam_file = str, # Path to bam file,
|
45
|
+
minor_allele_frequency_wig = str, # Path to a HiFiCNV MAF wig
|
42
46
|
rhocall_wig = str, # Path to a rhocall wig file showing heterozygosity levels
|
43
47
|
rhocall_bed = str, # Path to a rhocall bed file marking LOH regions
|
44
|
-
tiddit_coverage_wig = str, # Path to a TIDDIT coverage wig - overview coverage
|
48
|
+
tiddit_coverage_wig = str, # Path to a TIDDIT or HiFiCNV coverage wig - overview coverage
|
45
49
|
upd_regions_bed = str, # Path to a UPD regions bed marking UPD calls
|
46
50
|
upd_sites_bed = str, # Path to a UPD sites bed, showing UPD info for vars
|
47
51
|
vcf2cytosure = str, # Path to CGH file
|
scout/build/variant/variant.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
import logging
|
3
|
+
from typing import List
|
3
4
|
|
4
5
|
from scout.utils.convert import call_safe
|
5
6
|
from scout.utils.dict_utils import remove_nonetype
|
@@ -115,6 +116,7 @@ def build_variant(
|
|
115
116
|
revel_score = float, REVEL rankscore
|
116
117
|
revel = float, REVEL score
|
117
118
|
clnsig = list, # list of <clinsig>
|
119
|
+
|
118
120
|
spidex = float,
|
119
121
|
|
120
122
|
missing_data = bool, # default False
|
@@ -247,6 +249,7 @@ def build_variant(
|
|
247
249
|
add_hgnc_symbols(variant_obj, variant_obj["hgnc_ids"], hgncid_to_gene)
|
248
250
|
link_gene_panels(variant_obj, gene_to_panels)
|
249
251
|
add_clnsig_objects(variant_obj, variant.get("clnsig", []))
|
252
|
+
add_clnsig_onc_objects(variant_obj, variant.get("clnsig_onc"))
|
250
253
|
|
251
254
|
add_callers(variant_obj, variant.get("callers", {}))
|
252
255
|
|
@@ -337,6 +340,11 @@ def add_clnsig_objects(variant_obj, clnsig_list):
|
|
337
340
|
variant_obj["clnsig"] = clnsig_objects
|
338
341
|
|
339
342
|
|
343
|
+
def add_clnsig_onc_objects(variant_obj: dict, onc_clnsig: List[dict]):
|
344
|
+
if onc_clnsig:
|
345
|
+
variant_obj["clnsig_onc"] = onc_clnsig
|
346
|
+
|
347
|
+
|
340
348
|
def add_callers(variant_obj, call_info):
|
341
349
|
"""Add call_info to variant_obj
|
342
350
|
Args: variant_obj (Dict)
|
@@ -6,11 +6,21 @@ from typing import List, Optional
|
|
6
6
|
|
7
7
|
import click
|
8
8
|
|
9
|
-
from scout.utils.ensembl_biomart_clients import EnsemblBiomartHandler
|
9
|
+
from scout.utils.ensembl_biomart_clients import CHROM_SEPARATOR, EnsemblBiomartHandler
|
10
|
+
|
11
|
+
NR_EXPECTED_CHROMS = 24
|
10
12
|
|
11
13
|
LOG = logging.getLogger(__name__)
|
12
14
|
|
13
15
|
|
16
|
+
def integrity_check(nr_chromosomes_in_file: int):
|
17
|
+
if nr_chromosomes_in_file < NR_EXPECTED_CHROMS:
|
18
|
+
raise BufferError(
|
19
|
+
"Ensembl resource does not seem to be complete. Please retry downloading genes/transcripts."
|
20
|
+
)
|
21
|
+
LOG.info("Integrity check OK.")
|
22
|
+
|
23
|
+
|
14
24
|
def print_ensembl(
|
15
25
|
out_dir: pathlib.Path, resource_type: List[str], genome_build: Optional[str] = None
|
16
26
|
):
|
@@ -31,14 +41,19 @@ def print_ensembl(
|
|
31
41
|
|
32
42
|
file_name: str = f"ensembl_{resource_type}_{build}.txt"
|
33
43
|
file_path = out_dir / file_name
|
44
|
+
nr_chroms_in_file = 0
|
34
45
|
|
35
46
|
LOG.info("Print ensembl info %s to %s", build, file_path)
|
36
47
|
|
37
48
|
with file_path.open("w", encoding="utf-8") as outfile:
|
38
49
|
for line in ensembl_client.stream_resource(interval_type=resource_type):
|
39
|
-
|
50
|
+
if line.strip() == CHROM_SEPARATOR:
|
51
|
+
nr_chroms_in_file += 1
|
52
|
+
else:
|
53
|
+
outfile.write(line + "\n")
|
40
54
|
|
41
|
-
LOG.info(f"{file_name} file saved to disk")
|
55
|
+
LOG.info(f"{file_name} file saved to disk.")
|
56
|
+
integrity_check(nr_chroms_in_file)
|
42
57
|
|
43
58
|
|
44
59
|
@click.command("ensembl", help="Download files with ensembl info")
|
scout/commands/load/research.py
CHANGED
@@ -10,6 +10,7 @@ from scout.adapter import MongoAdapter
|
|
10
10
|
from scout.constants import ORDERED_FILE_TYPE_MAP
|
11
11
|
from scout.server.extensions import store
|
12
12
|
|
13
|
+
DEFAULT_RANK_THRESHOLD = 8
|
13
14
|
LOG = logging.getLogger(__name__)
|
14
15
|
|
15
16
|
|
@@ -23,7 +24,6 @@ def upload_research_variants(
|
|
23
24
|
"""Delete existing variants and upload new variants"""
|
24
25
|
adapter.delete_variants(case_id=case_obj["_id"], variant_type=variant_type, category=category)
|
25
26
|
|
26
|
-
LOG.info("Load %s %s for: %s", variant_type, category.upper(), case_obj["_id"])
|
27
27
|
adapter.load_variants(
|
28
28
|
case_obj=case_obj,
|
29
29
|
variant_type=variant_type,
|
@@ -85,7 +85,6 @@ def research(case_id, institute, force):
|
|
85
85
|
# Fetch all cases that have requested research
|
86
86
|
case_objs = adapter.cases(research_requested=True)
|
87
87
|
|
88
|
-
default_threshold = 8
|
89
88
|
files = False
|
90
89
|
raise_file_not_found = False
|
91
90
|
for case_obj in case_objs:
|
@@ -107,7 +106,7 @@ def research(case_id, institute, force):
|
|
107
106
|
case_obj=case_obj,
|
108
107
|
variant_type="research",
|
109
108
|
category=ORDERED_FILE_TYPE_MAP[file_type]["category"],
|
110
|
-
rank_treshold=
|
109
|
+
rank_treshold=case_obj.get("rank_score_threshold", DEFAULT_RANK_THRESHOLD),
|
111
110
|
)
|
112
111
|
|
113
112
|
if not files:
|
@@ -7,6 +7,7 @@ import click
|
|
7
7
|
from scout.server.extensions import store
|
8
8
|
|
9
9
|
UPDATE_DICT = {
|
10
|
+
"assembly_alignment_path": "path",
|
10
11
|
"bam_file": "path",
|
11
12
|
"bionano_access.sample": "str",
|
12
13
|
"bionano_access.project": "str",
|
@@ -15,7 +16,9 @@ UPDATE_DICT = {
|
|
15
16
|
"chromograph_images.coverage": "str",
|
16
17
|
"chromograph_images.upd_regions": "str",
|
17
18
|
"chromograph_images.upd_sites": "str",
|
19
|
+
"minor_allele_frequency_wig": "path",
|
18
20
|
"mt_bam": "path",
|
21
|
+
"paraphase_alignment_path": "path",
|
19
22
|
"reviewer.alignment": "path",
|
20
23
|
"reviewer.alignment_index": "path",
|
21
24
|
"reviewer.vcf": "path",
|
@@ -5,6 +5,7 @@ import logging
|
|
5
5
|
import click
|
6
6
|
from flask.cli import current_app, with_appcontext
|
7
7
|
|
8
|
+
from scout.constants.panels import PANELAPPGREEN_DISPLAY_NAME, PANELAPPGREEN_NAME
|
8
9
|
from scout.load.panelapp import load_panelapp_green_panel
|
9
10
|
from scout.server.extensions import store
|
10
11
|
|
@@ -31,8 +32,15 @@ LOG = logging.getLogger(__name__)
|
|
31
32
|
is_flag=True,
|
32
33
|
help="Force update even if updated panel contains less genes",
|
33
34
|
)
|
35
|
+
@click.option("--panel-id", help="Panel ID", default=PANELAPPGREEN_NAME, show_default=True)
|
36
|
+
@click.option(
|
37
|
+
"--panel-display-name",
|
38
|
+
help="Panel display name",
|
39
|
+
default=PANELAPPGREEN_DISPLAY_NAME,
|
40
|
+
show_default=True,
|
41
|
+
)
|
34
42
|
@with_appcontext
|
35
|
-
def panelapp_green(institute, force, signed_off):
|
43
|
+
def panelapp_green(institute, force, signed_off, panel_id, panel_display_name):
|
36
44
|
"""
|
37
45
|
Update the automatically generated PanelApp Green Genes panel in the database.
|
38
46
|
"""
|
@@ -47,7 +55,12 @@ def panelapp_green(institute, force, signed_off):
|
|
47
55
|
|
48
56
|
try:
|
49
57
|
load_panelapp_green_panel(
|
50
|
-
adapter=store,
|
58
|
+
adapter=store,
|
59
|
+
institute=institute,
|
60
|
+
force=force,
|
61
|
+
signed_off=signed_off,
|
62
|
+
panel_id=panel_id,
|
63
|
+
panel_display_name=panel_display_name,
|
51
64
|
)
|
52
65
|
except Exception as err:
|
53
66
|
LOG.error(err)
|
scout/constants/__init__.py
CHANGED
@@ -37,7 +37,7 @@ from .clinvar import (
|
|
37
37
|
GERMLINE_CLASSIF_TERMS,
|
38
38
|
MULTIPLE_CONDITION_EXPLANATION,
|
39
39
|
)
|
40
|
-
from .clnsig import CLINSIG_MAP, REV_CLINSIG_MAP, TRUSTED_REVSTAT_LEVEL
|
40
|
+
from .clnsig import CLINSIG_MAP, ONC_CLNSIG, REV_CLINSIG_MAP, TRUSTED_REVSTAT_LEVEL
|
41
41
|
from .disease_parsing import (
|
42
42
|
DISEASE_INHERITANCE_TERMS,
|
43
43
|
ENTRY_PATTERN,
|
@@ -45,7 +45,11 @@ from .disease_parsing import (
|
|
45
45
|
MIMNR_PATTERN,
|
46
46
|
OMIM_STATUS_MAP,
|
47
47
|
)
|
48
|
-
from .file_types import
|
48
|
+
from .file_types import (
|
49
|
+
DNA_SAMPLE_VARIANT_CATEGORIES,
|
50
|
+
ORDERED_FILE_TYPE_MAP,
|
51
|
+
ORDERED_OMICS_FILE_TYPE_MAP,
|
52
|
+
)
|
49
53
|
from .filters import (
|
50
54
|
CLINICAL_FILTER_BASE,
|
51
55
|
CLINICAL_FILTER_BASE_CANCER,
|