scout-browser 4.99.0__py3-none-any.whl → 4.100.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. scout/adapter/mongo/case.py +30 -15
  2. scout/adapter/mongo/clinvar.py +23 -31
  3. scout/adapter/mongo/event.py +14 -4
  4. scout/adapter/mongo/omics_variant.py +14 -1
  5. scout/adapter/mongo/query.py +24 -1
  6. scout/adapter/mongo/variant.py +37 -19
  7. scout/adapter/mongo/variant_loader.py +159 -176
  8. scout/build/individual.py +3 -1
  9. scout/commands/download/ensembl.py +1 -2
  10. scout/commands/load/research.py +2 -3
  11. scout/commands/update/individual.py +1 -0
  12. scout/constants/__init__.py +7 -2
  13. scout/constants/igv_tracks.py +4 -3
  14. scout/constants/indexes.py +5 -4
  15. scout/constants/query_terms.py +1 -0
  16. scout/models/case/case.py +1 -0
  17. scout/models/case/case_loading_models.py +3 -1
  18. scout/parse/ensembl.py +8 -3
  19. scout/server/app.py +6 -0
  20. scout/server/blueprints/alignviewers/templates/alignviewers/igv_viewer.html +10 -0
  21. scout/server/blueprints/cases/controllers.py +9 -3
  22. scout/server/blueprints/cases/templates/cases/case_report.html +25 -13
  23. scout/server/blueprints/cases/templates/cases/chanjo2_form.html +1 -1
  24. scout/server/blueprints/cases/templates/cases/collapsible_actionbar.html +1 -1
  25. scout/server/blueprints/cases/templates/cases/gene_panel.html +1 -1
  26. scout/server/blueprints/cases/templates/cases/utils.html +19 -0
  27. scout/server/blueprints/clinvar/controllers.py +5 -1
  28. scout/server/blueprints/clinvar/templates/clinvar/clinvar_submissions.html +34 -12
  29. scout/server/blueprints/clinvar/templates/clinvar/multistep_add_variant.html +1 -1
  30. scout/server/blueprints/diagnoses/static/diagnoses.js +8 -1
  31. scout/server/blueprints/institutes/static/variants_list_scripts.js +9 -1
  32. scout/server/blueprints/institutes/templates/overview/institute_sidebar.html +9 -1
  33. scout/server/blueprints/mme/__init__.py +1 -0
  34. scout/server/blueprints/mme/controllers.py +18 -0
  35. scout/server/blueprints/mme/templates/mme/mme_submissions.html +153 -0
  36. scout/server/blueprints/mme/views.py +34 -0
  37. scout/server/blueprints/panels/templates/panels/panel.html +19 -6
  38. scout/server/blueprints/phenotypes/templates/phenotypes/hpo_terms.html +8 -1
  39. scout/server/blueprints/variant/controllers.py +19 -10
  40. scout/server/blueprints/variant/templates/variant/acmg.html +9 -0
  41. scout/server/blueprints/variant/templates/variant/cancer-variant.html +1 -1
  42. scout/server/blueprints/variant/templates/variant/components.html +19 -16
  43. scout/server/blueprints/variant/templates/variant/sv-variant.html +2 -2
  44. scout/server/blueprints/variant/templates/variant/utils.html +20 -8
  45. scout/server/blueprints/variant/templates/variant/variant.html +42 -1
  46. scout/server/blueprints/variant/views.py +12 -0
  47. scout/server/blueprints/variants/controllers.py +17 -9
  48. scout/server/blueprints/variants/forms.py +8 -3
  49. scout/server/blueprints/variants/templates/variants/components.html +8 -2
  50. scout/server/blueprints/variants/templates/variants/indicators.html +11 -13
  51. scout/server/blueprints/variants/templates/variants/utils.html +27 -22
  52. scout/server/extensions/bionano_extension.py +0 -1
  53. scout/server/extensions/chanjo2_extension.py +54 -13
  54. scout/server/links.py +15 -0
  55. scout/server/static/bs_styles.css +34 -6
  56. scout/server/templates/utils.html +9 -10
  57. scout/server/utils.py +18 -0
  58. scout/utils/ensembl_biomart_clients.py +1 -0
  59. scout/utils/scout_requests.py +1 -3
  60. {scout_browser-4.99.0.dist-info → scout_browser-4.100.1.dist-info}/METADATA +1 -1
  61. {scout_browser-4.99.0.dist-info → scout_browser-4.100.1.dist-info}/RECORD +64 -60
  62. {scout_browser-4.99.0.dist-info → scout_browser-4.100.1.dist-info}/WHEEL +0 -0
  63. {scout_browser-4.99.0.dist-info → scout_browser-4.100.1.dist-info}/entry_points.txt +0 -0
  64. {scout_browser-4.99.0.dist-info → scout_browser-4.100.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,12 +1,15 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # stdlib modules
3
3
  import logging
4
+ import sys
4
5
  from datetime import datetime
6
+ from typing import Dict, Iterable, Optional
5
7
 
6
8
  import cyvcf2
7
9
 
8
10
  # Third party modules
9
11
  import pymongo
12
+ from click import progressbar
10
13
  from cyvcf2 import VCF, Variant
11
14
  from intervaltree import IntervalTree
12
15
  from pymongo.errors import BulkWriteError, DuplicateKeyError
@@ -349,54 +352,35 @@ class VariantLoader(object):
349
352
 
350
353
  def _load_variants(
351
354
  self,
352
- variants,
353
- variant_type,
354
- case_obj,
355
- individual_positions,
356
- rank_threshold,
357
- institute_id,
358
- build=None,
359
- rank_results_header=None,
360
- vep_header=None,
361
- category="snv",
362
- sample_info=None,
363
- custom_images=None,
364
- local_archive_info=None,
365
- gene_to_panels=None,
366
- hgncid_to_gene=None,
367
- genomic_intervals=None,
368
- ):
369
- """Perform the loading of variants
370
-
371
- This is the function that loops over the variants, parse them and build the variant
355
+ variants: Iterable[cyvcf2.Variant],
356
+ nr_variants: int,
357
+ variant_type: str,
358
+ case_obj: dict,
359
+ individual_positions: dict,
360
+ rank_threshold: int,
361
+ institute_id: str,
362
+ build: Optional[str] = None,
363
+ rank_results_header: Optional[list] = None,
364
+ vep_header: Optional[list] = None,
365
+ category: str = "snv",
366
+ sample_info: Optional[dict] = None,
367
+ custom_images: Optional[dict] = None,
368
+ local_archive_info: Optional[dict] = None,
369
+ gene_to_panels: Optional[Dict[str, set]] = None,
370
+ hgncid_to_gene: Optional[Dict[int, dict]] = None,
371
+ genomic_intervals: Optional[Dict[str, IntervalTree]] = None,
372
+ ) -> int:
373
+ """This is the function that loops over the variants, parses them and builds the variant
372
374
  objects so they are ready to be inserted into the database.
373
-
374
- Args:
375
- variants(iterable(cyvcf2.Variant))
376
- variant_type(str): ['clinical', 'research']
377
- case_obj(dict)
378
- individual_positions(dict): How individuals are positioned in vcf
379
- rank_treshold(int): Only load variants with a rank score > than this
380
- institute_id(str)
381
- build(str): Genome build
382
- rank_results_header(list): Rank score categories
383
- vep_header(list)
384
- category(str): ['snv','sv','cancer','str']
385
- sample_info(dict): A dictionary with info about samples.
386
- Strictly for cancer to tell which is tumor
387
- custom_images(dict): A dict with custom images for a case.
388
- local_archive_info(dict): A dict with info about the local archive used for annotation
389
-
390
- Returns:
391
- nr_inserted(int)
375
+ All variants with rank score above rank_threshold are loaded. All MT, pathogenic, managed or variants causative in other cases are also loaded.
376
+ individual_positions refers to the order of samples in the VCF file. sample_info contains info about samples. It is used for instance to define tumor samples in cancer cases.
377
+ local_archive_info contains info about the local archive used for annotation.
392
378
  """
393
379
  build = build or "37"
394
380
 
395
- LOG.info("Start inserting {0} {1} variants into database".format(variant_type, category))
396
381
  start_insertion = datetime.now()
397
382
  start_five_thousand = datetime.now()
398
- # These are the number of parsed varaints
399
- nr_variants = 0
383
+
400
384
  # These are the number of variants that meet the criteria and gets inserted
401
385
  nr_inserted = 0
402
386
  # This is to keep track of blocks of inserted variants
@@ -408,123 +392,131 @@ class VariantLoader(object):
408
392
  bulk = {}
409
393
  current_region = None
410
394
 
411
- for nr_variants, variant in enumerate(variants):
412
- # All MT variants are loaded
413
- mt_variant = "MT" in variant.CHROM
414
- rank_score = parse_rank_score(variant.INFO.get("RankScore"), case_obj["_id"])
415
- pathogenic = is_pathogenic(variant)
416
- managed = self._is_managed(variant, category)
417
- causative = self._is_causative_other_cases(variant, category)
418
-
419
- # Check if the variant should be loaded at all
420
- # if rank score is None means there are no rank scores annotated, all variants will be loaded
421
- # Otherwise we load all variants above a rank score treshold
422
- # Except for MT variants where we load all variants
423
- if (
424
- (rank_score is None)
425
- or (rank_score > rank_threshold)
426
- or mt_variant
427
- or pathogenic
428
- or causative
429
- or managed
430
- or category in ["str"]
431
- ):
432
- nr_inserted += 1
433
- # Parse the vcf variant
434
- parsed_variant = parse_variant(
435
- variant=variant,
436
- case=case_obj,
437
- variant_type=variant_type,
438
- rank_results_header=rank_results_header,
439
- vep_header=vep_header,
440
- individual_positions=individual_positions,
441
- category=category,
442
- local_archive_info=local_archive_info,
443
- )
444
-
445
- # Build the variant object
446
- variant_obj = build_variant(
447
- variant=parsed_variant,
448
- institute_id=institute_id,
449
- gene_to_panels=gene_to_panels,
450
- hgncid_to_gene=hgncid_to_gene,
451
- sample_info=sample_info,
452
- )
453
-
454
- # Check if the variant is in a genomic region
455
- var_chrom = variant_obj["chromosome"]
456
- var_start = variant_obj["position"]
457
- # We need to make sure that the interval has a length > 0
458
- var_end = variant_obj["end"] + 1
459
- var_id = variant_obj["_id"]
460
- # If the bulk should be loaded or not
461
- load = True
462
- new_region = None
463
-
464
- intervals = genomic_intervals.get(var_chrom, IntervalTree())
465
- genomic_regions = intervals.overlap(var_start, var_end)
466
-
467
- # If the variant is in a coding region
468
- if genomic_regions:
469
- # We know there is data here so get the interval id
470
- new_region = genomic_regions.pop().data
471
- # If the variant is in the same region as previous
472
- # we add it to the same bulk
473
- if new_region == current_region:
474
- load = False
475
-
476
- # This is the case where the variant is intergenic
477
- else:
478
- # If the previous variant was also intergenic we add the variant to the bulk
479
- if not current_region:
480
- load = False
481
- # We need to have a max size of the bulk
482
- if len(bulk) > 10000:
483
- load = True
484
- # Associate variant with image
485
- if custom_images:
486
- images = [
487
- img for img in custom_images if img["str_repid"] == variant_obj["str_repid"]
488
- ]
489
- if len(images) > 0:
490
- variant_obj["custom_images"] = images
491
- # Load the variant object
492
- if load:
493
- # If the variant bulk contains coding variants we want to update the compounds
494
- if current_region:
495
- self.update_compounds(bulk)
496
- try:
497
- # Load the variants
498
- self.load_variant_bulk(list(bulk.values()))
499
- nr_bulks += 1
500
- except IntegrityError as error:
501
- pass
502
- bulk = {}
503
-
504
- current_region = new_region
505
- if var_id in bulk:
506
- LOG.warning(
507
- "Duplicated variant %s detected in same bulk. Attempting separate upsert.",
508
- variant_obj.get("simple_id"),
395
+ LOG.info(f"Number of variants present on the VCF file:{nr_variants}")
396
+ with progressbar(
397
+ variants, label="Loading variants", length=nr_variants, file=sys.stdout
398
+ ) as bar:
399
+ for idx, variant in enumerate(bar):
400
+ # All MT variants are loaded
401
+ mt_variant = variant.CHROM in ["M", "MT"]
402
+ rank_score = parse_rank_score(variant.INFO.get("RankScore"), case_obj["_id"])
403
+ pathogenic = is_pathogenic(variant)
404
+ managed = self._is_managed(variant, category)
405
+ causative = self._is_causative_other_cases(variant, category)
406
+
407
+ # Check if the variant should be loaded at all
408
+ # if rank score is None means there are no rank scores annotated, all variants will be loaded
409
+ # Otherwise we load all variants above a rank score treshold
410
+ # Except for MT variants where we load all variants
411
+ if (
412
+ (rank_score is None)
413
+ or (rank_score > rank_threshold)
414
+ or mt_variant
415
+ or pathogenic
416
+ or causative
417
+ or managed
418
+ or category in ["str"]
419
+ ):
420
+ nr_inserted += 1
421
+ # Parse the vcf variant
422
+ parsed_variant = parse_variant(
423
+ variant=variant,
424
+ case=case_obj,
425
+ variant_type=variant_type,
426
+ rank_results_header=rank_results_header,
427
+ vep_header=vep_header,
428
+ individual_positions=individual_positions,
429
+ category=category,
430
+ local_archive_info=local_archive_info,
509
431
  )
510
- try:
511
- self.upsert_variant(variant_obj)
512
- except IntegrityError as err:
513
- pass
514
- else:
515
- bulk[var_id] = variant_obj
516
-
517
- if nr_variants != 0 and nr_variants % 5000 == 0:
518
- LOG.info("%s variants parsed", str(nr_variants))
519
- LOG.info(
520
- "Time to parse variants: %s",
521
- (datetime.now() - start_five_thousand),
432
+
433
+ # Build the variant object
434
+ variant_obj = build_variant(
435
+ variant=parsed_variant,
436
+ institute_id=institute_id,
437
+ gene_to_panels=gene_to_panels,
438
+ hgncid_to_gene=hgncid_to_gene,
439
+ sample_info=sample_info,
522
440
  )
523
- start_five_thousand = datetime.now()
524
441
 
525
- if nr_inserted != 0 and (nr_inserted * inserted) % (1000 * inserted) == 0:
526
- LOG.info("%s variants inserted", nr_inserted)
527
- inserted += 1
442
+ # Check if the variant is in a genomic region
443
+ var_chrom = variant_obj["chromosome"]
444
+ var_start = variant_obj["position"]
445
+ # We need to make sure that the interval has a length > 0
446
+ var_end = variant_obj["end"] + 1
447
+ var_id = variant_obj["_id"]
448
+ # If the bulk should be loaded or not
449
+ load = True
450
+ new_region = None
451
+
452
+ intervals = genomic_intervals.get(var_chrom, IntervalTree())
453
+ genomic_regions = intervals.overlap(var_start, var_end)
454
+
455
+ # If the variant is in a coding region
456
+ if genomic_regions:
457
+ # We know there is data here so get the interval id
458
+ new_region = genomic_regions.pop().data
459
+ # If the variant is in the same region as previous
460
+ # we add it to the same bulk
461
+ if new_region == current_region:
462
+ load = False
463
+
464
+ # This is the case where the variant is intergenic
465
+ else:
466
+ # If the previous variant was also intergenic we add the variant to the bulk
467
+ if not current_region:
468
+ load = False
469
+ # We need to have a max size of the bulk
470
+ if len(bulk) > 10000:
471
+ load = True
472
+ # Associate variant with image
473
+ if custom_images:
474
+ images = [
475
+ img
476
+ for img in custom_images
477
+ if img["str_repid"] == variant_obj["str_repid"]
478
+ ]
479
+ if len(images) > 0:
480
+ variant_obj["custom_images"] = images
481
+
482
+ # Load the variant object
483
+ if load:
484
+ # If the variant bulk contains coding variants we want to update the compounds
485
+ if current_region:
486
+ self.update_compounds(bulk)
487
+ try:
488
+ # Load the variants
489
+ self.load_variant_bulk(list(bulk.values()))
490
+ nr_bulks += 1
491
+ except IntegrityError as error:
492
+ pass
493
+ bulk = {}
494
+
495
+ current_region = new_region
496
+ if var_id in bulk:
497
+ LOG.warning(
498
+ "Duplicated variant %s detected in same bulk. Attempting separate upsert.",
499
+ variant_obj.get("simple_id"),
500
+ )
501
+ try:
502
+ self.upsert_variant(variant_obj)
503
+ except IntegrityError as err:
504
+ pass
505
+ else:
506
+ bulk[var_id] = variant_obj
507
+
508
+ if nr_variants != 0 and nr_variants % 5000 == 0:
509
+ LOG.info("%s variants parsed", str(nr_variants))
510
+ LOG.info(
511
+ "Time to parse variants: %s",
512
+ (datetime.now() - start_five_thousand),
513
+ )
514
+ start_five_thousand = datetime.now()
515
+
516
+ if nr_inserted != 0 and (nr_inserted * inserted) % (1000 * inserted) == 0:
517
+ LOG.info("%s variants inserted", nr_inserted)
518
+ inserted += 1
519
+
528
520
  # If the variants are in a coding region we update the compounds
529
521
  if current_region:
530
522
  self.update_compounds(bulk)
@@ -538,8 +530,6 @@ class VariantLoader(object):
538
530
  )
539
531
  )
540
532
 
541
- if nr_variants:
542
- nr_variants += 1
543
533
  LOG.info("Nr variants parsed: %s", nr_variants)
544
534
  LOG.info("Nr variants inserted: %s", nr_inserted)
545
535
  LOG.debug("Nr bulks inserted: %s", nr_bulks)
@@ -657,30 +647,21 @@ class VariantLoader(object):
657
647
 
658
648
  nr_inserted = 0
659
649
 
660
- variant_files = []
650
+ gene_to_panels = self.gene_to_panels(case_obj)
651
+ genes = list(self.all_genes(build=build))
652
+ hgncid_to_gene = self.hgncid_to_gene(genes=genes, build=build)
653
+ genomic_intervals = self.get_coding_intervals(genes=genes, build=build)
654
+
661
655
  for vcf_file_key, vcf_dict in ORDERED_FILE_TYPE_MAP.items():
662
656
  if vcf_dict["variant_type"] != variant_type:
663
657
  continue
664
658
  if vcf_dict["category"] != category:
665
659
  continue
666
660
 
667
- LOG.debug("Attempt to load %s %s VCF.", variant_type, category.upper())
661
+ LOG.info(f"Loading'{vcf_file_key}' variants")
668
662
  variant_file = case_obj["vcf_files"].get(vcf_file_key)
669
- if variant_file:
670
- variant_files.append(variant_file)
671
-
672
- if not variant_files:
673
- raise SyntaxError(
674
- "VCF files for {} {} does not seem to exist".format(category, variant_type)
675
- )
676
663
 
677
- gene_to_panels = self.gene_to_panels(case_obj)
678
- genes = list(self.all_genes(build=build))
679
- hgncid_to_gene = self.hgncid_to_gene(genes=genes, build=build)
680
- genomic_intervals = self.get_coding_intervals(genes=genes, build=build)
681
-
682
- for variant_file in variant_files:
683
- if not self._has_variants_in_file(variant_file):
664
+ if not variant_file or not self._has_variants_in_file(variant_file):
684
665
  continue
685
666
 
686
667
  vcf_obj = VCF(variant_file)
@@ -722,11 +703,13 @@ class VariantLoader(object):
722
703
  else:
723
704
  rank_threshold = rank_threshold or 0
724
705
 
725
- variants = vcf_obj(region)
706
+ nr_variants = sum(1 for _ in vcf_obj(region))
707
+ vcf_obj = VCF(variant_file)
726
708
 
727
709
  try:
728
710
  nr_inserted = self._load_variants(
729
- variants=variants,
711
+ variants=vcf_obj(region),
712
+ nr_variants=nr_variants,
730
713
  variant_type=variant_type,
731
714
  case_obj=case_obj,
732
715
  individual_positions=individual_positions,
scout/build/individual.py CHANGED
@@ -9,6 +9,7 @@ BUILD_INDIVIDUAL_FILES = [
9
9
  "assembly_alignment_path",
10
10
  "bam_file",
11
11
  "d4_file",
12
+ "minor_allele_frequency_wig",
12
13
  "mt_bam",
13
14
  "paraphase_alignment_path",
14
15
  "rhocall_bed",
@@ -41,9 +42,10 @@ def build_individual(ind: dict) -> dict:
41
42
  mother = str, # Individual id of mother
42
43
  capture_kits = list, # List of names of capture kits
43
44
  bam_file = str, # Path to bam file,
45
+ minor_allele_frequency_wig = str, # Path to a HiFiCNV MAF wig
44
46
  rhocall_wig = str, # Path to a rhocall wig file showing heterozygosity levels
45
47
  rhocall_bed = str, # Path to a rhocall bed file marking LOH regions
46
- tiddit_coverage_wig = str, # Path to a TIDDIT coverage wig - overview coverage
48
+ tiddit_coverage_wig = str, # Path to a TIDDIT or HiFiCNV coverage wig - overview coverage
47
49
  upd_regions_bed = str, # Path to a UPD regions bed marking UPD calls
48
50
  upd_sites_bed = str, # Path to a UPD sites bed, showing UPD info for vars
49
51
  vcf2cytosure = str, # Path to CGH file
@@ -6,9 +6,8 @@ from typing import List, Optional
6
6
 
7
7
  import click
8
8
 
9
- from scout.utils.ensembl_biomart_clients import EnsemblBiomartHandler
9
+ from scout.utils.ensembl_biomart_clients import CHROM_SEPARATOR, EnsemblBiomartHandler
10
10
 
11
- CHROM_SEPARATOR = "[success]"
12
11
  NR_EXPECTED_CHROMS = 24
13
12
 
14
13
  LOG = logging.getLogger(__name__)
@@ -10,6 +10,7 @@ from scout.adapter import MongoAdapter
10
10
  from scout.constants import ORDERED_FILE_TYPE_MAP
11
11
  from scout.server.extensions import store
12
12
 
13
+ DEFAULT_RANK_THRESHOLD = 8
13
14
  LOG = logging.getLogger(__name__)
14
15
 
15
16
 
@@ -23,7 +24,6 @@ def upload_research_variants(
23
24
  """Delete existing variants and upload new variants"""
24
25
  adapter.delete_variants(case_id=case_obj["_id"], variant_type=variant_type, category=category)
25
26
 
26
- LOG.info("Load %s %s for: %s", variant_type, category.upper(), case_obj["_id"])
27
27
  adapter.load_variants(
28
28
  case_obj=case_obj,
29
29
  variant_type=variant_type,
@@ -85,7 +85,6 @@ def research(case_id, institute, force):
85
85
  # Fetch all cases that have requested research
86
86
  case_objs = adapter.cases(research_requested=True)
87
87
 
88
- default_threshold = 8
89
88
  files = False
90
89
  raise_file_not_found = False
91
90
  for case_obj in case_objs:
@@ -107,7 +106,7 @@ def research(case_id, institute, force):
107
106
  case_obj=case_obj,
108
107
  variant_type="research",
109
108
  category=ORDERED_FILE_TYPE_MAP[file_type]["category"],
110
- rank_treshold=default_threshold,
109
+ rank_treshold=case_obj.get("rank_score_threshold", DEFAULT_RANK_THRESHOLD),
111
110
  )
112
111
 
113
112
  if not files:
@@ -16,6 +16,7 @@ UPDATE_DICT = {
16
16
  "chromograph_images.coverage": "str",
17
17
  "chromograph_images.upd_regions": "str",
18
18
  "chromograph_images.upd_sites": "str",
19
+ "minor_allele_frequency_wig": "path",
19
20
  "mt_bam": "path",
20
21
  "paraphase_alignment_path": "path",
21
22
  "reviewer.alignment": "path",
@@ -37,7 +37,7 @@ from .clinvar import (
37
37
  GERMLINE_CLASSIF_TERMS,
38
38
  MULTIPLE_CONDITION_EXPLANATION,
39
39
  )
40
- from .clnsig import CLINSIG_MAP, REV_CLINSIG_MAP, TRUSTED_REVSTAT_LEVEL
40
+ from .clnsig import CLINSIG_MAP, ONC_CLNSIG, REV_CLINSIG_MAP, TRUSTED_REVSTAT_LEVEL
41
41
  from .disease_parsing import (
42
42
  DISEASE_INHERITANCE_TERMS,
43
43
  ENTRY_PATTERN,
@@ -68,7 +68,12 @@ from .gene_tags import (
68
68
  PANEL_GENE_INFO_TRANSCRIPTS,
69
69
  UPDATE_GENES_RESOURCES,
70
70
  )
71
- from .igv_tracks import CASE_SPECIFIC_TRACKS, HUMAN_REFERENCE, IGV_TRACKS, USER_DEFAULT_TRACKS
71
+ from .igv_tracks import (
72
+ CASE_SPECIFIC_TRACKS,
73
+ HUMAN_REFERENCE,
74
+ IGV_TRACKS,
75
+ USER_DEFAULT_TRACKS,
76
+ )
72
77
  from .indexes import ID_PROJECTION, INDEXES
73
78
  from .panels import PANELAPP_CONFIDENCE_EXCLUDE
74
79
  from .phenotype import (
@@ -126,11 +126,12 @@ HUMAN_GENES_38 = {
126
126
  CASE_SPECIFIC_TRACKS = {
127
127
  "paraphase_alignments": "Paraphase Alignment",
128
128
  "assembly_alignments": "de novo Assembly Alignment",
129
+ "minor_allele_frequency_wigs": "SV Caller Minor Allele Frequency",
129
130
  "rhocall_beds": "Rhocall Zygosity",
130
131
  "rhocall_wigs": "Rhocall Regions",
131
- "tiddit_coverage_wigs": "TIDDIT Coverage",
132
- "upd_regions_beds": "UPD regions",
133
- "upd_sites_beds": "UPD sites",
132
+ "tiddit_coverage_wigs": "SV Caller Coverage",
133
+ "upd_regions_beds": "UPD Regions",
134
+ "upd_sites_beds": "UPD Sites",
134
135
  }
135
136
 
136
137
  HUMAN_REFERENCE = {"37": HUMAN_REFERENCE_37, "38": HUMAN_REFERENCE_38}
@@ -99,20 +99,21 @@ INDEXES = {
99
99
  ("category", ASCENDING),
100
100
  ("case_id", ASCENDING),
101
101
  ("variant_type", ASCENDING),
102
- ("rank_score", ASCENDING),
102
+ ("rank_score", DESCENDING),
103
+ ("hgnc_ids", ASCENDING),
103
104
  ],
104
- name="category_caseid_varianttype_rankscore",
105
+ name="category_caseid_varianttype_rankscore_hgncids",
105
106
  ),
106
107
  IndexModel(
107
108
  [
109
+ ("chromosome", ASCENDING),
108
110
  ("case_id", ASCENDING),
109
111
  ("category", ASCENDING),
110
112
  ("variant_type", ASCENDING),
111
- ("chromosome", ASCENDING),
112
113
  ("start", ASCENDING),
113
114
  ("end", ASCENDING),
114
115
  ],
115
- name="caseid_category_chromosome_start_end",
116
+ name="chromosome_caseid_category_start_end",
116
117
  ),
117
118
  IndexModel(
118
119
  [("variant_id", ASCENDING), ("institute", ASCENDING)],
@@ -62,4 +62,5 @@ SECONDARY_CRITERIA = [
62
62
  "split_reads",
63
63
  "fusion_caller",
64
64
  "rank_score",
65
+ "clinsig_onc",
65
66
  ]
scout/models/case/case.py CHANGED
@@ -14,6 +14,7 @@ individual = dict(
14
14
  mother=str, # Individual id of mother
15
15
  capture_kits=list, # List of names of capture kits
16
16
  bam_file=str, # Path to bam file
17
+ minor_allele_frequency_wig=str, # Path to wig file
17
18
  rhocall_bed=str, # Path to bed file
18
19
  rhocall_wig=str, # Path to wig file
19
20
  tiddit_coverage_wig=str, # Path to wig file
@@ -28,6 +28,7 @@ SAMPLES_FILE_PATH_CHECKS = [
28
28
  "assembly_alignment_path",
29
29
  "bam_file",
30
30
  "d4_file",
31
+ "minor_allele_frequency_wig",
31
32
  "mitodel_file",
32
33
  "paraphase_alignment_path",
33
34
  "rhocall_bed",
@@ -219,6 +220,7 @@ class SampleLoader(BaseModel):
219
220
  individual_id: str = Field(alias="sample_id")
220
221
  is_sma: Optional[str] = None
221
222
  is_sma_carrier: Optional[str] = None
223
+ minor_allele_frequency_wig: Optional[str] = None
222
224
  mitodel_file: Optional[str] = None
223
225
  mitodel: Optional[Mitodel] = Mitodel()
224
226
  mother: Optional[str] = None
@@ -241,7 +243,7 @@ class SampleLoader(BaseModel):
241
243
  smn_27134_cn: Optional[int] = None
242
244
  splice_junctions_bed: Optional[str] = None
243
245
  subject_id: Optional[str] = None
244
- tiddit_coverage_wig: Optional[str] = None
246
+ tiddit_coverage_wig: Optional[str] = Field(None, alias="coverage_wig")
245
247
  tissue_type: Optional[str] = None
246
248
  tmb: Optional[str] = None
247
249
  tumor_purity: Optional[float] = 0.0
scout/parse/ensembl.py CHANGED
@@ -3,6 +3,8 @@
3
3
  import logging
4
4
  from typing import Any, Dict, List
5
5
 
6
+ from scout.utils.ensembl_biomart_clients import CHROM_SEPARATOR
7
+
6
8
  LOG = logging.getLogger(__name__)
7
9
 
8
10
 
@@ -120,8 +122,8 @@ def parse_ensembl_genes(lines):
120
122
  if index == 0:
121
123
  header = line.rstrip().split("\t")
122
124
  continue
123
- # After that each line represents a gene
124
-
125
+ elif line == CHROM_SEPARATOR:
126
+ continue
125
127
  yield parse_ensembl_line(line, header)
126
128
 
127
129
 
@@ -143,7 +145,8 @@ def parse_ensembl_transcripts(lines):
143
145
  # File allways start with a header line
144
146
  if index == 0:
145
147
  header = line.rstrip().split("\t")
146
- # After that each line represents a transcript
148
+ elif line == CHROM_SEPARATOR:
149
+ continue
147
150
  else:
148
151
  yield parse_ensembl_line(line, header)
149
152
 
@@ -165,6 +168,8 @@ def parse_ensembl_exons(lines):
165
168
  if index == 0:
166
169
  header = line.rstrip().split("\t")
167
170
  continue
171
+ elif line == CHROM_SEPARATOR:
172
+ continue
168
173
 
169
174
  exon_info = parse_ensembl_line(line, header)
170
175
 
scout/server/app.py CHANGED
@@ -29,6 +29,7 @@ from .blueprints import (
29
29
  institutes,
30
30
  login,
31
31
  managed_variants,
32
+ mme,
32
33
  omics_variants,
33
34
  panels,
34
35
  phenomodels,
@@ -116,6 +117,10 @@ def configure_extensions(app):
116
117
  extensions.chanjo_report.init_app(app)
117
118
  LOG.info("Chanjo extension enabled")
118
119
 
120
+ if app.config.get("CHANJO2_URL"):
121
+ LOG.info("Chanjo2 extension enabled")
122
+ extensions.chanjo2.init_app(app)
123
+
119
124
  if app.config.get("LOQUSDB_SETTINGS"):
120
125
  LOG.info("LoqusDB enabled")
121
126
  # setup LoqusDB
@@ -185,6 +190,7 @@ def register_blueprints(app):
185
190
  app.register_blueprint(genes.genes_bp)
186
191
  app.register_blueprint(cases.cases_bp)
187
192
  app.register_blueprint(clinvar.clinvar_bp)
193
+ app.register_blueprint(mme.mme_bp)
188
194
  app.register_blueprint(login.login_bp)
189
195
  app.register_blueprint(variant.variant_bp)
190
196
  app.register_blueprint(variants.variants_bp)