scout-browser 4.98.0__py3-none-any.whl → 4.100.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. scout/adapter/mongo/case.py +30 -15
  2. scout/adapter/mongo/clinvar.py +23 -31
  3. scout/adapter/mongo/event.py +14 -4
  4. scout/adapter/mongo/institute.py +42 -55
  5. scout/adapter/mongo/omics_variant.py +14 -1
  6. scout/adapter/mongo/query.py +24 -1
  7. scout/adapter/mongo/variant.py +44 -22
  8. scout/adapter/mongo/variant_loader.py +169 -186
  9. scout/build/individual.py +5 -1
  10. scout/build/variant/variant.py +8 -0
  11. scout/commands/download/ensembl.py +18 -3
  12. scout/commands/load/research.py +2 -3
  13. scout/commands/update/individual.py +3 -0
  14. scout/commands/update/panelapp.py +15 -2
  15. scout/constants/__init__.py +6 -2
  16. scout/constants/clnsig.py +2 -0
  17. scout/constants/file_types.py +12 -0
  18. scout/constants/igv_tracks.py +9 -6
  19. scout/constants/indexes.py +5 -4
  20. scout/constants/panels.py +3 -0
  21. scout/constants/query_terms.py +1 -0
  22. scout/constants/variant_tags.py +6 -6
  23. scout/demo/643594.config.yaml +1 -0
  24. scout/load/panelapp.py +11 -5
  25. scout/models/case/case.py +1 -0
  26. scout/models/case/case_loading_models.py +7 -1
  27. scout/parse/ensembl.py +8 -3
  28. scout/parse/variant/clnsig.py +38 -0
  29. scout/parse/variant/genotype.py +4 -10
  30. scout/parse/variant/models.py +5 -11
  31. scout/parse/variant/rank_score.py +5 -13
  32. scout/parse/variant/variant.py +90 -111
  33. scout/server/app.py +39 -22
  34. scout/server/blueprints/alignviewers/controllers.py +29 -10
  35. scout/server/blueprints/alignviewers/templates/alignviewers/igv_viewer.html +51 -11
  36. scout/server/blueprints/cases/controllers.py +9 -3
  37. scout/server/blueprints/cases/templates/cases/case_report.html +25 -13
  38. scout/server/blueprints/cases/templates/cases/chanjo2_form.html +1 -1
  39. scout/server/blueprints/cases/templates/cases/collapsible_actionbar.html +1 -1
  40. scout/server/blueprints/cases/templates/cases/gene_panel.html +1 -1
  41. scout/server/blueprints/cases/templates/cases/utils.html +25 -6
  42. scout/server/blueprints/clinvar/controllers.py +34 -15
  43. scout/server/blueprints/clinvar/templates/clinvar/clinvar_submissions.html +34 -12
  44. scout/server/blueprints/clinvar/templates/clinvar/multistep_add_variant.html +14 -5
  45. scout/server/blueprints/clinvar/views.py +14 -2
  46. scout/server/blueprints/diagnoses/static/diagnoses.js +8 -1
  47. scout/server/blueprints/institutes/controllers.py +10 -2
  48. scout/server/blueprints/institutes/static/variants_list_scripts.js +9 -1
  49. scout/server/blueprints/institutes/templates/overview/institute_sidebar.html +9 -1
  50. scout/server/blueprints/login/controllers.py +112 -12
  51. scout/server/blueprints/login/views.py +38 -60
  52. scout/server/blueprints/mme/__init__.py +1 -0
  53. scout/server/blueprints/mme/controllers.py +18 -0
  54. scout/server/blueprints/mme/templates/mme/mme_submissions.html +153 -0
  55. scout/server/blueprints/mme/views.py +34 -0
  56. scout/server/blueprints/panels/templates/panels/panel.html +19 -6
  57. scout/server/blueprints/phenotypes/templates/phenotypes/hpo_terms.html +8 -1
  58. scout/server/blueprints/public/templates/public/index.html +5 -1
  59. scout/server/blueprints/variant/controllers.py +19 -10
  60. scout/server/blueprints/variant/templates/variant/acmg.html +15 -2
  61. scout/server/blueprints/variant/templates/variant/cancer-variant.html +1 -1
  62. scout/server/blueprints/variant/templates/variant/components.html +38 -16
  63. scout/server/blueprints/variant/templates/variant/sv-variant.html +2 -2
  64. scout/server/blueprints/variant/templates/variant/utils.html +23 -11
  65. scout/server/blueprints/variant/templates/variant/variant.html +42 -1
  66. scout/server/blueprints/variant/views.py +12 -0
  67. scout/server/blueprints/variants/controllers.py +20 -3
  68. scout/server/blueprints/variants/forms.py +8 -3
  69. scout/server/blueprints/variants/templates/variants/components.html +34 -0
  70. scout/server/blueprints/variants/templates/variants/indicators.html +11 -13
  71. scout/server/blueprints/variants/templates/variants/mei-variants.html +8 -6
  72. scout/server/blueprints/variants/templates/variants/sv-variants.html +9 -7
  73. scout/server/blueprints/variants/templates/variants/utils.html +35 -34
  74. scout/server/blueprints/variants/templates/variants/variants.html +4 -25
  75. scout/server/config.py +8 -0
  76. scout/server/extensions/bionano_extension.py +0 -1
  77. scout/server/extensions/chanjo2_extension.py +54 -13
  78. scout/server/links.py +15 -0
  79. scout/server/static/bs_styles.css +34 -6
  80. scout/server/templates/utils.html +9 -10
  81. scout/server/utils.py +40 -5
  82. scout/utils/acmg.py +25 -26
  83. scout/utils/ensembl_biomart_clients.py +2 -1
  84. scout/utils/ensembl_rest_clients.py +25 -32
  85. scout/utils/hgvs.py +1 -1
  86. scout/utils/scout_requests.py +1 -3
  87. {scout_browser-4.98.0.dist-info → scout_browser-4.100.0.dist-info}/METADATA +10 -14
  88. {scout_browser-4.98.0.dist-info → scout_browser-4.100.0.dist-info}/RECORD +91 -87
  89. {scout_browser-4.98.0.dist-info → scout_browser-4.100.0.dist-info}/WHEEL +0 -0
  90. {scout_browser-4.98.0.dist-info → scout_browser-4.100.0.dist-info}/entry_points.txt +0 -0
  91. {scout_browser-4.98.0.dist-info → scout_browser-4.100.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,12 +1,15 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # stdlib modules
3
3
  import logging
4
+ import sys
4
5
  from datetime import datetime
6
+ from typing import Dict, Iterable, Optional
5
7
 
6
8
  import cyvcf2
7
9
 
8
10
  # Third party modules
9
11
  import pymongo
12
+ from click import progressbar
10
13
  from cyvcf2 import VCF, Variant
11
14
  from intervaltree import IntervalTree
12
15
  from pymongo.errors import BulkWriteError, DuplicateKeyError
@@ -349,54 +352,35 @@ class VariantLoader(object):
349
352
 
350
353
  def _load_variants(
351
354
  self,
352
- variants,
353
- variant_type,
354
- case_obj,
355
- individual_positions,
356
- rank_threshold,
357
- institute_id,
358
- build=None,
359
- rank_results_header=None,
360
- vep_header=None,
361
- category="snv",
362
- sample_info=None,
363
- custom_images=None,
364
- local_archive_info=None,
365
- gene_to_panels=None,
366
- hgncid_to_gene=None,
367
- genomic_intervals=None,
368
- ):
369
- """Perform the loading of variants
370
-
371
- This is the function that loops over the variants, parse them and build the variant
355
+ variants: Iterable[cyvcf2.Variant],
356
+ nr_variants: int,
357
+ variant_type: str,
358
+ case_obj: dict,
359
+ individual_positions: dict,
360
+ rank_threshold: int,
361
+ institute_id: str,
362
+ build: Optional[str] = None,
363
+ rank_results_header: Optional[list] = None,
364
+ vep_header: Optional[list] = None,
365
+ category: str = "snv",
366
+ sample_info: Optional[dict] = None,
367
+ custom_images: Optional[dict] = None,
368
+ local_archive_info: Optional[dict] = None,
369
+ gene_to_panels: Optional[Dict[str, set]] = None,
370
+ hgncid_to_gene: Optional[Dict[int, dict]] = None,
371
+ genomic_intervals: Optional[Dict[str, IntervalTree]] = None,
372
+ ) -> int:
373
+ """This is the function that loops over the variants, parses them and builds the variant
372
374
  objects so they are ready to be inserted into the database.
373
-
374
- Args:
375
- variants(iterable(cyvcf2.Variant))
376
- variant_type(str): ['clinical', 'research']
377
- case_obj(dict)
378
- individual_positions(dict): How individuals are positioned in vcf
379
- rank_treshold(int): Only load variants with a rank score > than this
380
- institute_id(str)
381
- build(str): Genome build
382
- rank_results_header(list): Rank score categories
383
- vep_header(list)
384
- category(str): ['snv','sv','cancer','str']
385
- sample_info(dict): A dictionary with info about samples.
386
- Strictly for cancer to tell which is tumor
387
- custom_images(dict): A dict with custom images for a case.
388
- local_archive_info(dict): A dict with info about the local archive used for annotation
389
-
390
- Returns:
391
- nr_inserted(int)
375
+ All variants with rank score above rank_threshold are loaded. All MT, pathogenic, managed or variants causative in other cases are also loaded.
376
+ individual_positions refers to the order of samples in the VCF file. sample_info contains info about samples. It is used for instance to define tumor samples in cancer cases.
377
+ local_archive_info contains info about the local archive used for annotation.
392
378
  """
393
379
  build = build or "37"
394
380
 
395
- LOG.info("Start inserting {0} {1} variants into database".format(variant_type, category))
396
381
  start_insertion = datetime.now()
397
382
  start_five_thousand = datetime.now()
398
- # These are the number of parsed varaints
399
- nr_variants = 0
383
+
400
384
  # These are the number of variants that meet the criteria and gets inserted
401
385
  nr_inserted = 0
402
386
  # This is to keep track of blocks of inserted variants
@@ -408,123 +392,131 @@ class VariantLoader(object):
408
392
  bulk = {}
409
393
  current_region = None
410
394
 
411
- for nr_variants, variant in enumerate(variants):
412
- # All MT variants are loaded
413
- mt_variant = "MT" in variant.CHROM
414
- rank_score = parse_rank_score(variant.INFO.get("RankScore"), case_obj["_id"])
415
- pathogenic = is_pathogenic(variant)
416
- managed = self._is_managed(variant, category)
417
- causative = self._is_causative_other_cases(variant, category)
418
-
419
- # Check if the variant should be loaded at all
420
- # if rank score is None means there are no rank scores annotated, all variants will be loaded
421
- # Otherwise we load all variants above a rank score treshold
422
- # Except for MT variants where we load all variants
423
- if (
424
- (rank_score is None)
425
- or (rank_score > rank_threshold)
426
- or mt_variant
427
- or pathogenic
428
- or causative
429
- or managed
430
- or category in ["str"]
431
- ):
432
- nr_inserted += 1
433
- # Parse the vcf variant
434
- parsed_variant = parse_variant(
435
- variant=variant,
436
- case=case_obj,
437
- variant_type=variant_type,
438
- rank_results_header=rank_results_header,
439
- vep_header=vep_header,
440
- individual_positions=individual_positions,
441
- category=category,
442
- local_archive_info=local_archive_info,
443
- )
444
-
445
- # Build the variant object
446
- variant_obj = build_variant(
447
- variant=parsed_variant,
448
- institute_id=institute_id,
449
- gene_to_panels=gene_to_panels,
450
- hgncid_to_gene=hgncid_to_gene,
451
- sample_info=sample_info,
452
- )
453
-
454
- # Check if the variant is in a genomic region
455
- var_chrom = variant_obj["chromosome"]
456
- var_start = variant_obj["position"]
457
- # We need to make sure that the interval has a length > 0
458
- var_end = variant_obj["end"] + 1
459
- var_id = variant_obj["_id"]
460
- # If the bulk should be loaded or not
461
- load = True
462
- new_region = None
463
-
464
- intervals = genomic_intervals.get(var_chrom, IntervalTree())
465
- genomic_regions = intervals.overlap(var_start, var_end)
466
-
467
- # If the variant is in a coding region
468
- if genomic_regions:
469
- # We know there is data here so get the interval id
470
- new_region = genomic_regions.pop().data
471
- # If the variant is in the same region as previous
472
- # we add it to the same bulk
473
- if new_region == current_region:
474
- load = False
475
-
476
- # This is the case where the variant is intergenic
477
- else:
478
- # If the previous variant was also intergenic we add the variant to the bulk
479
- if not current_region:
480
- load = False
481
- # We need to have a max size of the bulk
482
- if len(bulk) > 10000:
483
- load = True
484
- # Associate variant with image
485
- if custom_images:
486
- images = [
487
- img for img in custom_images if img["str_repid"] == variant_obj["str_repid"]
488
- ]
489
- if len(images) > 0:
490
- variant_obj["custom_images"] = images
491
- # Load the variant object
492
- if load:
493
- # If the variant bulk contains coding variants we want to update the compounds
494
- if current_region:
495
- self.update_compounds(bulk)
496
- try:
497
- # Load the variants
498
- self.load_variant_bulk(list(bulk.values()))
499
- nr_bulks += 1
500
- except IntegrityError as error:
501
- pass
502
- bulk = {}
503
-
504
- current_region = new_region
505
- if var_id in bulk:
506
- LOG.warning(
507
- "Duplicated variant %s detected in same bulk. Attempting separate upsert.",
508
- variant_obj.get("simple_id"),
395
+ LOG.info(f"Number of variants present on the VCF file:{nr_variants}")
396
+ with progressbar(
397
+ variants, label="Loading variants", length=nr_variants, file=sys.stdout
398
+ ) as bar:
399
+ for idx, variant in enumerate(bar):
400
+ # All MT variants are loaded
401
+ mt_variant = variant.CHROM in ["M", "MT"]
402
+ rank_score = parse_rank_score(variant.INFO.get("RankScore"), case_obj["_id"])
403
+ pathogenic = is_pathogenic(variant)
404
+ managed = self._is_managed(variant, category)
405
+ causative = self._is_causative_other_cases(variant, category)
406
+
407
+ # Check if the variant should be loaded at all
408
+ # if rank score is None means there are no rank scores annotated, all variants will be loaded
409
+ # Otherwise we load all variants above a rank score treshold
410
+ # Except for MT variants where we load all variants
411
+ if (
412
+ (rank_score is None)
413
+ or (rank_score > rank_threshold)
414
+ or mt_variant
415
+ or pathogenic
416
+ or causative
417
+ or managed
418
+ or category in ["str"]
419
+ ):
420
+ nr_inserted += 1
421
+ # Parse the vcf variant
422
+ parsed_variant = parse_variant(
423
+ variant=variant,
424
+ case=case_obj,
425
+ variant_type=variant_type,
426
+ rank_results_header=rank_results_header,
427
+ vep_header=vep_header,
428
+ individual_positions=individual_positions,
429
+ category=category,
430
+ local_archive_info=local_archive_info,
509
431
  )
510
- try:
511
- self.upsert_variant(variant_obj)
512
- except IntegrityError as err:
513
- pass
514
- else:
515
- bulk[var_id] = variant_obj
516
-
517
- if nr_variants != 0 and nr_variants % 5000 == 0:
518
- LOG.info("%s variants parsed", str(nr_variants))
519
- LOG.info(
520
- "Time to parse variants: %s",
521
- (datetime.now() - start_five_thousand),
432
+
433
+ # Build the variant object
434
+ variant_obj = build_variant(
435
+ variant=parsed_variant,
436
+ institute_id=institute_id,
437
+ gene_to_panels=gene_to_panels,
438
+ hgncid_to_gene=hgncid_to_gene,
439
+ sample_info=sample_info,
522
440
  )
523
- start_five_thousand = datetime.now()
524
441
 
525
- if nr_inserted != 0 and (nr_inserted * inserted) % (1000 * inserted) == 0:
526
- LOG.info("%s variants inserted", nr_inserted)
527
- inserted += 1
442
+ # Check if the variant is in a genomic region
443
+ var_chrom = variant_obj["chromosome"]
444
+ var_start = variant_obj["position"]
445
+ # We need to make sure that the interval has a length > 0
446
+ var_end = variant_obj["end"] + 1
447
+ var_id = variant_obj["_id"]
448
+ # If the bulk should be loaded or not
449
+ load = True
450
+ new_region = None
451
+
452
+ intervals = genomic_intervals.get(var_chrom, IntervalTree())
453
+ genomic_regions = intervals.overlap(var_start, var_end)
454
+
455
+ # If the variant is in a coding region
456
+ if genomic_regions:
457
+ # We know there is data here so get the interval id
458
+ new_region = genomic_regions.pop().data
459
+ # If the variant is in the same region as previous
460
+ # we add it to the same bulk
461
+ if new_region == current_region:
462
+ load = False
463
+
464
+ # This is the case where the variant is intergenic
465
+ else:
466
+ # If the previous variant was also intergenic we add the variant to the bulk
467
+ if not current_region:
468
+ load = False
469
+ # We need to have a max size of the bulk
470
+ if len(bulk) > 10000:
471
+ load = True
472
+ # Associate variant with image
473
+ if custom_images:
474
+ images = [
475
+ img
476
+ for img in custom_images
477
+ if img["str_repid"] == variant_obj["str_repid"]
478
+ ]
479
+ if len(images) > 0:
480
+ variant_obj["custom_images"] = images
481
+
482
+ # Load the variant object
483
+ if load:
484
+ # If the variant bulk contains coding variants we want to update the compounds
485
+ if current_region:
486
+ self.update_compounds(bulk)
487
+ try:
488
+ # Load the variants
489
+ self.load_variant_bulk(list(bulk.values()))
490
+ nr_bulks += 1
491
+ except IntegrityError as error:
492
+ pass
493
+ bulk = {}
494
+
495
+ current_region = new_region
496
+ if var_id in bulk:
497
+ LOG.warning(
498
+ "Duplicated variant %s detected in same bulk. Attempting separate upsert.",
499
+ variant_obj.get("simple_id"),
500
+ )
501
+ try:
502
+ self.upsert_variant(variant_obj)
503
+ except IntegrityError as err:
504
+ pass
505
+ else:
506
+ bulk[var_id] = variant_obj
507
+
508
+ if nr_variants != 0 and nr_variants % 5000 == 0:
509
+ LOG.info("%s variants parsed", str(nr_variants))
510
+ LOG.info(
511
+ "Time to parse variants: %s",
512
+ (datetime.now() - start_five_thousand),
513
+ )
514
+ start_five_thousand = datetime.now()
515
+
516
+ if nr_inserted != 0 and (nr_inserted * inserted) % (1000 * inserted) == 0:
517
+ LOG.info("%s variants inserted", nr_inserted)
518
+ inserted += 1
519
+
528
520
  # If the variants are in a coding region we update the compounds
529
521
  if current_region:
530
522
  self.update_compounds(bulk)
@@ -538,8 +530,6 @@ class VariantLoader(object):
538
530
  )
539
531
  )
540
532
 
541
- if nr_variants:
542
- nr_variants += 1
543
533
  LOG.info("Nr variants parsed: %s", nr_variants)
544
534
  LOG.info("Nr variants inserted: %s", nr_inserted)
545
535
  LOG.debug("Nr bulks inserted: %s", nr_bulks)
@@ -621,16 +611,16 @@ class VariantLoader(object):
621
611
 
622
612
  def load_variants(
623
613
  self,
624
- case_obj,
625
- variant_type="clinical",
626
- category="snv",
627
- rank_threshold=None,
628
- chrom=None,
629
- start=None,
630
- end=None,
631
- gene_obj=None,
632
- custom_images=None,
633
- build="37",
614
+ case_obj: dict,
615
+ variant_type: str = "clinical",
616
+ category: str = "snv",
617
+ rank_threshold: float = None,
618
+ chrom: str = None,
619
+ start: int = None,
620
+ end: int = None,
621
+ gene_obj: dict = None,
622
+ custom_images: list = None,
623
+ build: str = "37",
634
624
  ):
635
625
  """Load variants for a case into scout.
636
626
 
@@ -657,30 +647,21 @@ class VariantLoader(object):
657
647
 
658
648
  nr_inserted = 0
659
649
 
660
- variant_files = []
650
+ gene_to_panels = self.gene_to_panels(case_obj)
651
+ genes = list(self.all_genes(build=build))
652
+ hgncid_to_gene = self.hgncid_to_gene(genes=genes, build=build)
653
+ genomic_intervals = self.get_coding_intervals(genes=genes, build=build)
654
+
661
655
  for vcf_file_key, vcf_dict in ORDERED_FILE_TYPE_MAP.items():
662
656
  if vcf_dict["variant_type"] != variant_type:
663
657
  continue
664
658
  if vcf_dict["category"] != category:
665
659
  continue
666
660
 
667
- LOG.debug("Attempt to load %s %s VCF.", variant_type, category.upper())
661
+ LOG.info(f"Loading'{vcf_file_key}' variants")
668
662
  variant_file = case_obj["vcf_files"].get(vcf_file_key)
669
- if variant_file:
670
- variant_files.append(variant_file)
671
663
 
672
- if not variant_files:
673
- raise SyntaxError(
674
- "VCF files for {} {} does not seem to exist".format(category, variant_type)
675
- )
676
-
677
- gene_to_panels = self.gene_to_panels(case_obj)
678
- genes = [gene_obj for gene_obj in self.all_genes(build=build)]
679
- hgncid_to_gene = self.hgncid_to_gene(genes=genes, build=build)
680
- genomic_intervals = self.get_coding_intervals(genes=genes, build=build)
681
-
682
- for variant_file in variant_files:
683
- if not self._has_variants_in_file(variant_file):
664
+ if not variant_file or not self._has_variants_in_file(variant_file):
684
665
  continue
685
666
 
686
667
  vcf_obj = VCF(variant_file)
@@ -722,11 +703,13 @@ class VariantLoader(object):
722
703
  else:
723
704
  rank_threshold = rank_threshold or 0
724
705
 
725
- variants = vcf_obj(region)
706
+ nr_variants = sum(1 for _ in vcf_obj(region))
707
+ vcf_obj = VCF(variant_file)
726
708
 
727
709
  try:
728
710
  nr_inserted = self._load_variants(
729
- variants=variants,
711
+ variants=vcf_obj(region),
712
+ nr_variants=nr_variants,
730
713
  variant_type=variant_type,
731
714
  case_obj=case_obj,
732
715
  individual_positions=individual_positions,
scout/build/individual.py CHANGED
@@ -6,9 +6,12 @@ from scout.exceptions import PedigreeError
6
6
 
7
7
  log = logging.getLogger(__name__)
8
8
  BUILD_INDIVIDUAL_FILES = [
9
+ "assembly_alignment_path",
9
10
  "bam_file",
10
11
  "d4_file",
12
+ "minor_allele_frequency_wig",
11
13
  "mt_bam",
14
+ "paraphase_alignment_path",
12
15
  "rhocall_bed",
13
16
  "rhocall_wig",
14
17
  "rna_alignment_path",
@@ -39,9 +42,10 @@ def build_individual(ind: dict) -> dict:
39
42
  mother = str, # Individual id of mother
40
43
  capture_kits = list, # List of names of capture kits
41
44
  bam_file = str, # Path to bam file,
45
+ minor_allele_frequency_wig = str, # Path to a HiFiCNV MAF wig
42
46
  rhocall_wig = str, # Path to a rhocall wig file showing heterozygosity levels
43
47
  rhocall_bed = str, # Path to a rhocall bed file marking LOH regions
44
- tiddit_coverage_wig = str, # Path to a TIDDIT coverage wig - overview coverage
48
+ tiddit_coverage_wig = str, # Path to a TIDDIT or HiFiCNV coverage wig - overview coverage
45
49
  upd_regions_bed = str, # Path to a UPD regions bed marking UPD calls
46
50
  upd_sites_bed = str, # Path to a UPD sites bed, showing UPD info for vars
47
51
  vcf2cytosure = str, # Path to CGH file
@@ -1,5 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  import logging
3
+ from typing import List
3
4
 
4
5
  from scout.utils.convert import call_safe
5
6
  from scout.utils.dict_utils import remove_nonetype
@@ -115,6 +116,7 @@ def build_variant(
115
116
  revel_score = float, REVEL rankscore
116
117
  revel = float, REVEL score
117
118
  clnsig = list, # list of <clinsig>
119
+
118
120
  spidex = float,
119
121
 
120
122
  missing_data = bool, # default False
@@ -247,6 +249,7 @@ def build_variant(
247
249
  add_hgnc_symbols(variant_obj, variant_obj["hgnc_ids"], hgncid_to_gene)
248
250
  link_gene_panels(variant_obj, gene_to_panels)
249
251
  add_clnsig_objects(variant_obj, variant.get("clnsig", []))
252
+ add_clnsig_onc_objects(variant_obj, variant.get("clnsig_onc"))
250
253
 
251
254
  add_callers(variant_obj, variant.get("callers", {}))
252
255
 
@@ -337,6 +340,11 @@ def add_clnsig_objects(variant_obj, clnsig_list):
337
340
  variant_obj["clnsig"] = clnsig_objects
338
341
 
339
342
 
343
+ def add_clnsig_onc_objects(variant_obj: dict, onc_clnsig: List[dict]):
344
+ if onc_clnsig:
345
+ variant_obj["clnsig_onc"] = onc_clnsig
346
+
347
+
340
348
  def add_callers(variant_obj, call_info):
341
349
  """Add call_info to variant_obj
342
350
  Args: variant_obj (Dict)
@@ -6,11 +6,21 @@ from typing import List, Optional
6
6
 
7
7
  import click
8
8
 
9
- from scout.utils.ensembl_biomart_clients import EnsemblBiomartHandler
9
+ from scout.utils.ensembl_biomart_clients import CHROM_SEPARATOR, EnsemblBiomartHandler
10
+
11
+ NR_EXPECTED_CHROMS = 24
10
12
 
11
13
  LOG = logging.getLogger(__name__)
12
14
 
13
15
 
16
+ def integrity_check(nr_chromosomes_in_file: int):
17
+ if nr_chromosomes_in_file < NR_EXPECTED_CHROMS:
18
+ raise BufferError(
19
+ "Ensembl resource does not seem to be complete. Please retry downloading genes/transcripts."
20
+ )
21
+ LOG.info("Integrity check OK.")
22
+
23
+
14
24
  def print_ensembl(
15
25
  out_dir: pathlib.Path, resource_type: List[str], genome_build: Optional[str] = None
16
26
  ):
@@ -31,14 +41,19 @@ def print_ensembl(
31
41
 
32
42
  file_name: str = f"ensembl_{resource_type}_{build}.txt"
33
43
  file_path = out_dir / file_name
44
+ nr_chroms_in_file = 0
34
45
 
35
46
  LOG.info("Print ensembl info %s to %s", build, file_path)
36
47
 
37
48
  with file_path.open("w", encoding="utf-8") as outfile:
38
49
  for line in ensembl_client.stream_resource(interval_type=resource_type):
39
- outfile.write(line + "\n")
50
+ if line.strip() == CHROM_SEPARATOR:
51
+ nr_chroms_in_file += 1
52
+ else:
53
+ outfile.write(line + "\n")
40
54
 
41
- LOG.info(f"{file_name} file saved to disk")
55
+ LOG.info(f"{file_name} file saved to disk.")
56
+ integrity_check(nr_chroms_in_file)
42
57
 
43
58
 
44
59
  @click.command("ensembl", help="Download files with ensembl info")
@@ -10,6 +10,7 @@ from scout.adapter import MongoAdapter
10
10
  from scout.constants import ORDERED_FILE_TYPE_MAP
11
11
  from scout.server.extensions import store
12
12
 
13
+ DEFAULT_RANK_THRESHOLD = 8
13
14
  LOG = logging.getLogger(__name__)
14
15
 
15
16
 
@@ -23,7 +24,6 @@ def upload_research_variants(
23
24
  """Delete existing variants and upload new variants"""
24
25
  adapter.delete_variants(case_id=case_obj["_id"], variant_type=variant_type, category=category)
25
26
 
26
- LOG.info("Load %s %s for: %s", variant_type, category.upper(), case_obj["_id"])
27
27
  adapter.load_variants(
28
28
  case_obj=case_obj,
29
29
  variant_type=variant_type,
@@ -85,7 +85,6 @@ def research(case_id, institute, force):
85
85
  # Fetch all cases that have requested research
86
86
  case_objs = adapter.cases(research_requested=True)
87
87
 
88
- default_threshold = 8
89
88
  files = False
90
89
  raise_file_not_found = False
91
90
  for case_obj in case_objs:
@@ -107,7 +106,7 @@ def research(case_id, institute, force):
107
106
  case_obj=case_obj,
108
107
  variant_type="research",
109
108
  category=ORDERED_FILE_TYPE_MAP[file_type]["category"],
110
- rank_treshold=default_threshold,
109
+ rank_treshold=case_obj.get("rank_score_threshold", DEFAULT_RANK_THRESHOLD),
111
110
  )
112
111
 
113
112
  if not files:
@@ -7,6 +7,7 @@ import click
7
7
  from scout.server.extensions import store
8
8
 
9
9
  UPDATE_DICT = {
10
+ "assembly_alignment_path": "path",
10
11
  "bam_file": "path",
11
12
  "bionano_access.sample": "str",
12
13
  "bionano_access.project": "str",
@@ -15,7 +16,9 @@ UPDATE_DICT = {
15
16
  "chromograph_images.coverage": "str",
16
17
  "chromograph_images.upd_regions": "str",
17
18
  "chromograph_images.upd_sites": "str",
19
+ "minor_allele_frequency_wig": "path",
18
20
  "mt_bam": "path",
21
+ "paraphase_alignment_path": "path",
19
22
  "reviewer.alignment": "path",
20
23
  "reviewer.alignment_index": "path",
21
24
  "reviewer.vcf": "path",
@@ -5,6 +5,7 @@ import logging
5
5
  import click
6
6
  from flask.cli import current_app, with_appcontext
7
7
 
8
+ from scout.constants.panels import PANELAPPGREEN_DISPLAY_NAME, PANELAPPGREEN_NAME
8
9
  from scout.load.panelapp import load_panelapp_green_panel
9
10
  from scout.server.extensions import store
10
11
 
@@ -31,8 +32,15 @@ LOG = logging.getLogger(__name__)
31
32
  is_flag=True,
32
33
  help="Force update even if updated panel contains less genes",
33
34
  )
35
+ @click.option("--panel-id", help="Panel ID", default=PANELAPPGREEN_NAME, show_default=True)
36
+ @click.option(
37
+ "--panel-display-name",
38
+ help="Panel display name",
39
+ default=PANELAPPGREEN_DISPLAY_NAME,
40
+ show_default=True,
41
+ )
34
42
  @with_appcontext
35
- def panelapp_green(institute, force, signed_off):
43
+ def panelapp_green(institute, force, signed_off, panel_id, panel_display_name):
36
44
  """
37
45
  Update the automatically generated PanelApp Green Genes panel in the database.
38
46
  """
@@ -47,7 +55,12 @@ def panelapp_green(institute, force, signed_off):
47
55
 
48
56
  try:
49
57
  load_panelapp_green_panel(
50
- adapter=store, institute=institute, force=force, signed_off=signed_off
58
+ adapter=store,
59
+ institute=institute,
60
+ force=force,
61
+ signed_off=signed_off,
62
+ panel_id=panel_id,
63
+ panel_display_name=panel_display_name,
51
64
  )
52
65
  except Exception as err:
53
66
  LOG.error(err)
@@ -37,7 +37,7 @@ from .clinvar import (
37
37
  GERMLINE_CLASSIF_TERMS,
38
38
  MULTIPLE_CONDITION_EXPLANATION,
39
39
  )
40
- from .clnsig import CLINSIG_MAP, REV_CLINSIG_MAP, TRUSTED_REVSTAT_LEVEL
40
+ from .clnsig import CLINSIG_MAP, ONC_CLNSIG, REV_CLINSIG_MAP, TRUSTED_REVSTAT_LEVEL
41
41
  from .disease_parsing import (
42
42
  DISEASE_INHERITANCE_TERMS,
43
43
  ENTRY_PATTERN,
@@ -45,7 +45,11 @@ from .disease_parsing import (
45
45
  MIMNR_PATTERN,
46
46
  OMIM_STATUS_MAP,
47
47
  )
48
- from .file_types import ORDERED_FILE_TYPE_MAP, ORDERED_OMICS_FILE_TYPE_MAP
48
+ from .file_types import (
49
+ DNA_SAMPLE_VARIANT_CATEGORIES,
50
+ ORDERED_FILE_TYPE_MAP,
51
+ ORDERED_OMICS_FILE_TYPE_MAP,
52
+ )
49
53
  from .filters import (
50
54
  CLINICAL_FILTER_BASE,
51
55
  CLINICAL_FILTER_BASE_CANCER,
scout/constants/clnsig.py CHANGED
@@ -65,3 +65,5 @@ TRUSTED_REVSTAT_LEVEL = [
65
65
  "guideline",
66
66
  "practice_guideline",
67
67
  ]
68
+
69
+ ONC_CLNSIG = ["Oncogenic", "Likely oncogenic", "Uncertain significance", "Likely benign", "Benign"]