scout-browser 4.96.0__py3-none-any.whl → 4.97.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. scout/adapter/mongo/case.py +51 -47
  2. scout/adapter/mongo/filter.py +28 -11
  3. scout/adapter/mongo/institute.py +2 -0
  4. scout/adapter/mongo/omics_variant.py +20 -5
  5. scout/adapter/mongo/query.py +104 -95
  6. scout/adapter/mongo/variant.py +0 -5
  7. scout/adapter/mongo/variant_loader.py +10 -12
  8. scout/build/individual.py +3 -11
  9. scout/commands/delete/delete_command.py +87 -49
  10. scout/commands/load/research.py +4 -4
  11. scout/commands/load/variants.py +25 -8
  12. scout/commands/setup/setup_scout.py +1 -1
  13. scout/commands/update/case.py +12 -0
  14. scout/commands/update/individual.py +1 -2
  15. scout/constants/__init__.py +7 -2
  16. scout/constants/file_types.py +68 -119
  17. scout/constants/filters.py +2 -1
  18. scout/constants/gene_tags.py +3 -3
  19. scout/constants/igv_tracks.py +7 -11
  20. scout/constants/query_terms.py +2 -2
  21. scout/demo/643594.config.yaml +6 -0
  22. scout/demo/643594.peddy.ped +1 -1
  23. scout/demo/643594.somalier.ancestry.tsv +4 -0
  24. scout/demo/643594.somalier.pairs.tsv +4 -0
  25. scout/demo/643594.somalier.samples.tsv +4 -0
  26. scout/demo/cancer.load_config.yaml +1 -0
  27. scout/demo/resources/__init__.py +1 -1
  28. scout/demo/resources/gnomad.v4.1.constraint_metrics_reduced.tsv +3755 -0
  29. scout/exceptions/database.py +1 -1
  30. scout/load/all.py +8 -16
  31. scout/models/case/case.py +1 -0
  32. scout/models/case/case_loading_models.py +12 -5
  33. scout/models/managed_variant.py +3 -3
  34. scout/models/omics_variant.py +3 -3
  35. scout/parse/case.py +112 -5
  36. scout/parse/pedqc.py +127 -0
  37. scout/parse/variant/frequency.py +9 -6
  38. scout/parse/variant/variant.py +71 -39
  39. scout/server/app.py +2 -0
  40. scout/server/blueprints/alignviewers/controllers.py +2 -0
  41. scout/server/blueprints/alignviewers/templates/alignviewers/igv_viewer.html +3 -0
  42. scout/server/blueprints/alignviewers/templates/alignviewers/utils.html +1 -1
  43. scout/server/blueprints/cases/controllers.py +23 -3
  44. scout/server/blueprints/cases/templates/cases/case.html +3 -0
  45. scout/server/blueprints/cases/templates/cases/chanjo2_form.html +2 -2
  46. scout/server/blueprints/cases/templates/cases/gene_panel.html +9 -3
  47. scout/server/blueprints/cases/templates/cases/individuals_table.html +4 -1
  48. scout/server/blueprints/cases/templates/cases/utils.html +23 -19
  49. scout/server/blueprints/cases/views.py +5 -9
  50. scout/server/blueprints/clinvar/controllers.py +11 -11
  51. scout/server/blueprints/clinvar/templates/clinvar/multistep_add_variant.html +15 -7
  52. scout/server/blueprints/institutes/controllers.py +20 -1
  53. scout/server/blueprints/institutes/forms.py +5 -1
  54. scout/server/blueprints/institutes/templates/overview/institute_settings.html +7 -0
  55. scout/server/blueprints/institutes/templates/overview/utils.html +20 -1
  56. scout/server/blueprints/omics_variants/templates/omics_variants/outliers.html +9 -2
  57. scout/server/blueprints/omics_variants/views.py +8 -10
  58. scout/server/blueprints/variant/controllers.py +30 -1
  59. scout/server/blueprints/variant/templates/variant/cancer-variant.html +19 -3
  60. scout/server/blueprints/variant/templates/variant/components.html +26 -9
  61. scout/server/blueprints/variant/templates/variant/variant.html +4 -2
  62. scout/server/blueprints/variant/utils.py +2 -0
  63. scout/server/blueprints/variants/controllers.py +29 -3
  64. scout/server/blueprints/variants/forms.py +37 -10
  65. scout/server/blueprints/variants/templates/variants/components.html +12 -10
  66. scout/server/blueprints/variants/templates/variants/utils.html +59 -36
  67. scout/server/blueprints/variants/views.py +45 -60
  68. scout/server/extensions/beacon_extension.py +1 -1
  69. scout/server/extensions/bionano_extension.py +5 -5
  70. scout/server/extensions/chanjo2_extension.py +40 -1
  71. scout/server/extensions/chanjo_extension.py +1 -1
  72. scout/server/extensions/matchmaker_extension.py +1 -1
  73. scout/server/static/bs_styles.css +2 -0
  74. scout/server/templates/layout.html +1 -0
  75. scout/server/utils.py +5 -0
  76. scout/utils/ensembl_biomart_clients.py +2 -11
  77. scout/utils/scout_requests.py +1 -1
  78. {scout_browser-4.96.0.dist-info → scout_browser-4.97.0.dist-info}/METADATA +1 -1
  79. {scout_browser-4.96.0.dist-info → scout_browser-4.97.0.dist-info}/RECORD +82 -80
  80. scout/demo/resources/gnomad.v4.0.constraint_metrics_reduced.tsv +0 -3755
  81. scout/parse/peddy.py +0 -149
  82. scout/utils/sort.py +0 -21
  83. {scout_browser-4.96.0.dist-info → scout_browser-4.97.0.dist-info}/WHEEL +0 -0
  84. {scout_browser-4.96.0.dist-info → scout_browser-4.97.0.dist-info}/entry_points.txt +0 -0
  85. {scout_browser-4.96.0.dist-info → scout_browser-4.97.0.dist-info}/licenses/LICENSE +0 -0
@@ -3,6 +3,7 @@ import datetime
3
3
  import logging
4
4
  import operator
5
5
  import re
6
+ from collections import OrderedDict
6
7
  from copy import deepcopy
7
8
  from typing import Any, Dict, List, Optional
8
9
 
@@ -11,11 +12,16 @@ from bson import ObjectId
11
12
  from werkzeug.datastructures import ImmutableMultiDict
12
13
 
13
14
  from scout.build.case import build_case
14
- from scout.constants import ACMG_MAP, CCV_MAP, FILE_TYPE_MAP, ID_PROJECTION, OMICS_FILE_TYPE_MAP
15
+ from scout.constants import (
16
+ ACMG_MAP,
17
+ CCV_MAP,
18
+ ID_PROJECTION,
19
+ ORDERED_FILE_TYPE_MAP,
20
+ ORDERED_OMICS_FILE_TYPE_MAP,
21
+ )
15
22
  from scout.exceptions import ConfigError, IntegrityError
16
23
  from scout.parse.variant.ids import parse_document_id
17
24
  from scout.utils.algorithms import ui_score
18
- from scout.utils.sort import get_load_priority
19
25
 
20
26
  LOG = logging.getLogger(__name__)
21
27
  EXISTS = "$exists"
@@ -886,11 +892,17 @@ class CaseHandler(object):
886
892
  if key in old_case:
887
893
  new_case[key] = old_case[key]
888
894
 
889
- def _load_omics_variants(self, case_obj: dict, build: str, update: bool = False):
895
+ def _load_clinical_omics_variants(self, case_obj: dict, build: str, update: bool = False):
890
896
  """Load omics variants. The OMICS FILE type dict contains all we need to
891
897
  determine how to load variants (type, category etc)."""
892
898
 
893
- for omics_file in OMICS_FILE_TYPE_MAP.keys():
899
+ CLINICAL_ORDERED_OMICS_FILE_TYPE_MAP = OrderedDict(
900
+ (key, value)
901
+ for key, value in ORDERED_OMICS_FILE_TYPE_MAP.items()
902
+ if value["variant_type"] != "research"
903
+ )
904
+
905
+ for omics_file in CLINICAL_ORDERED_OMICS_FILE_TYPE_MAP.keys():
894
906
  if not case_obj["omics_files"].get(omics_file):
895
907
  LOG.debug("didn't find %s for case, skipping", omics_file)
896
908
  continue
@@ -901,6 +913,38 @@ class CaseHandler(object):
901
913
 
902
914
  self.load_omics_variants(case_obj=case_obj, build=build, file_type=omics_file)
903
915
 
916
+ def _load_clinical_variants(self, case_obj: dict, build: str, update: bool = False):
917
+ """Load variants in the order specified by CLINICAL_ORDERED_FILE_TYPE_MAP."""
918
+ CLINICAL_ORDERED_FILE_TYPE_MAP = OrderedDict(
919
+ (key, value)
920
+ for key, value in ORDERED_FILE_TYPE_MAP.items()
921
+ if value["variant_type"] != "research"
922
+ )
923
+ load_type_cat = set()
924
+ for file_name, vcf_dict in CLINICAL_ORDERED_FILE_TYPE_MAP.items():
925
+ if not case_obj["vcf_files"].get(file_name):
926
+ LOG.debug("didn't find {}, skipping".format(file_name))
927
+ continue
928
+ load_type_cat.add((vcf_dict["variant_type"], vcf_dict["category"]))
929
+
930
+ for variant_type, category in load_type_cat:
931
+ if update:
932
+ self.delete_variants(
933
+ case_id=case_obj["_id"],
934
+ variant_type=variant_type,
935
+ category=category,
936
+ )
937
+ self.load_variants(
938
+ case_obj=case_obj,
939
+ variant_type=variant_type,
940
+ category=category,
941
+ build=build,
942
+ rank_threshold=case_obj.get("rank_score_threshold", 5),
943
+ custom_images=self._get_variants_custom_images(
944
+ variant_category=category, case=case_obj
945
+ ),
946
+ )
947
+
904
948
  def load_case(self, config_data: dict, update: bool = False, keep_actions: bool = True) -> dict:
905
949
  """Load a case into the database
906
950
 
@@ -950,50 +994,9 @@ class CaseHandler(object):
950
994
  old_evaluated_variants = list(
951
995
  self.evaluated_variants(case_obj["_id"], case_obj["owner"])
952
996
  )
953
-
954
- # load from files
955
- files = [
956
- {
957
- "file_name": file_type,
958
- "variant_type": FILE_TYPE_MAP[file_type]["variant_type"],
959
- "category": FILE_TYPE_MAP[file_type]["category"],
960
- }
961
- for file_type in FILE_TYPE_MAP.keys()
962
- if FILE_TYPE_MAP[file_type]["variant_type"] != "research"
963
- ]
964
-
965
- # (type, category) tuples are not unique - eg SNV, SNV_MT
966
- load_variants = set()
967
997
  try:
968
- for vcf_file in files:
969
- # Check if any file of this kind is configured for case
970
- if not case_obj["vcf_files"].get(vcf_file["file_name"]):
971
- LOG.debug("didn't find {}, skipping".format(vcf_file["file_name"]))
972
- continue
973
- load_variants.add((vcf_file["variant_type"], vcf_file["category"]))
974
-
975
- for variant_type, category in sorted(
976
- load_variants,
977
- key=lambda tup: get_load_priority(variant_type=tup[0], category=tup[1]),
978
- ):
979
- if update:
980
- self.delete_variants(
981
- case_id=case_obj["_id"],
982
- variant_type=variant_type,
983
- category=category,
984
- )
985
- self.load_variants(
986
- case_obj=case_obj,
987
- variant_type=variant_type,
988
- category=category,
989
- build=genome_build,
990
- rank_threshold=case_obj.get("rank_score_threshold", 5),
991
- custom_images=self._get_variants_custom_images(
992
- variant_category=category, case=case_obj
993
- ),
994
- )
995
-
996
- self._load_omics_variants(case_obj, build=genome_build, update=update)
998
+ self._load_clinical_variants(case_obj, build=genome_build, update=update)
999
+ self._load_clinical_omics_variants(case_obj, build=genome_build, update=update)
997
1000
 
998
1001
  except (IntegrityError, ValueError, ConfigError, KeyError) as error:
999
1002
  LOG.exception(error)
@@ -1087,6 +1090,7 @@ class CaseHandler(object):
1087
1090
  Returns:
1088
1091
  updated_case(dict): The updated case information
1089
1092
  """
1093
+
1090
1094
  LOG.info("Updating case {0}".format(case_obj["_id"]))
1091
1095
  old_case = self.case_collection.find_one({"_id": case_obj["_id"]})
1092
1096
 
@@ -24,10 +24,10 @@ class FilterHandler(object):
24
24
  Returns:
25
25
  filter_obj(dict)
26
26
  """
27
- filter_obj = None
28
- LOG.debug("Retrieve filter {}".format(filter_id))
29
27
  filter_obj = self.filter_collection.find_one({"_id": ObjectId(filter_id)})
28
+
30
29
  if filter_obj is not None:
30
+ self.set_legacy_options(filter_obj)
31
31
  # use _id to preselect the currently loaded filter, and drop it while we are at it
32
32
  filter_obj.update([("filters", filter_obj.pop("_id", None))])
33
33
  return filter_obj
@@ -49,15 +49,6 @@ class FilterHandler(object):
49
49
  Returns:
50
50
  filter_id(str) - a unique id that can be cast to ObjectId
51
51
  """
52
-
53
- LOG.info(
54
- "Stashing filter for user '%s' and institute %s.",
55
- user_obj.get("email"),
56
- institute_obj.get("display_name"),
57
- )
58
-
59
- LOG.info("Filter object {}".format(filter_obj))
60
-
61
52
  institute_id = institute_obj.get("_id")
62
53
  filter_dict = {"institute_id": institute_id, "category": category}
63
54
 
@@ -271,3 +262,29 @@ class FilterHandler(object):
271
262
  )
272
263
 
273
264
  return filters_res
265
+
266
+ def set_legacy_options(self, filter_obj):
267
+ """Update remaining legacy filter options,
268
+ i.e. filter controls that changed names or functionality.
269
+ In particular, clinsig_confident_always_returned was split into two different
270
+ options: clinvar_trusted_revstat and prioritise_clinvar.
271
+ """
272
+ if "clinsig_confident_always_returned" not in filter_obj:
273
+ return
274
+
275
+ filter_value = filter_obj.pop("clinsig_confident_always_returned", ["True"])
276
+ filter_obj["clinvar_trusted_revstat"] = filter_value
277
+ filter_obj["prioritise_clinvar"] = filter_value
278
+
279
+ self.filter_collection.find_one_and_update(
280
+ {"_id": filter_obj["_id"]},
281
+ {
282
+ "$set": {
283
+ "clinvar_trusted_revstat": filter_value,
284
+ "prioritise_clinvar": filter_value,
285
+ },
286
+ "$unset": {
287
+ "clinsig_confident_always_returned": "",
288
+ },
289
+ },
290
+ )
@@ -56,6 +56,7 @@ class InstituteHandler(object):
56
56
  check_show_all_vars: Optional[str] = None,
57
57
  clinvar_key: Optional[str] = None,
58
58
  clinvar_submitters: Optional[List[str]] = None,
59
+ soft_filters: Optional[dict] = None,
59
60
  ) -> Union[dict, str]:
60
61
  """Update the information for an institute."""
61
62
 
@@ -127,6 +128,7 @@ class InstituteHandler(object):
127
128
  "alamut_institution": alamut_institution,
128
129
  "clinvar_key": clinvar_key,
129
130
  "show_all_cases_status": show_all_cases_status,
131
+ "soft_filters": soft_filters,
130
132
  }
131
133
  for key, value in ADMIN_SETTINGS.items():
132
134
  if value not in [None, "", []]:
@@ -1,11 +1,14 @@
1
1
  import logging
2
2
  from typing import Dict, Optional
3
3
 
4
- from scout.constants import OMICS_FILE_TYPE_MAP
4
+ from pymongo import ASCENDING, DESCENDING
5
+
6
+ from scout.constants import ORDERED_OMICS_FILE_TYPE_MAP
5
7
  from scout.models.omics_variant import OmicsVariantLoader
6
8
  from scout.parse.omics_variant import parse_omics_file
7
9
 
8
10
  LOG = logging.getLogger(__name__)
11
+ SORT_ORDER = {"asc": ASCENDING, "desc": DESCENDING}
9
12
 
10
13
 
11
14
  class OmicsVariantHandler:
@@ -30,7 +33,7 @@ class OmicsVariantHandler:
30
33
 
31
34
  def delete_omics_variants(self, case_id: str, file_type: str):
32
35
  """Delete OMICS variants for a case"""
33
- omics_file_type = OMICS_FILE_TYPE_MAP.get(file_type)
36
+ omics_file_type = ORDERED_OMICS_FILE_TYPE_MAP.get(file_type)
34
37
  category = omics_file_type["category"]
35
38
  sub_category = omics_file_type["sub_category"]
36
39
  variant_type = omics_file_type["variant_type"]
@@ -123,7 +126,7 @@ class OmicsVariantHandler:
123
126
  case_panels = case_obj.get("panels", [])
124
127
  gene_to_panels = self.gene_to_panels(case_obj)
125
128
 
126
- omics_file_type: dict = OMICS_FILE_TYPE_MAP.get(file_type)
129
+ omics_file_type: dict = ORDERED_OMICS_FILE_TYPE_MAP.get(file_type)
127
130
 
128
131
  nr_inserted = 0
129
132
 
@@ -180,9 +183,21 @@ class OmicsVariantHandler:
180
183
  else:
181
184
  nr_of_variants = skip + nr_of_variants
182
185
 
183
- query = self.build_query(case_id, query=query, category=category, build=build)
186
+ variants_query = self.build_query(case_id, query=query, category=category, build=build)
187
+
188
+ if query.get("sort_by") and query.get("sort_order"):
189
+ return (
190
+ self.omics_variant_collection.find(variants_query, projection)
191
+ .sort([(query.get("sort_by"), SORT_ORDER[query.get("sort_order")])])
192
+ .skip(skip)
193
+ .limit(nr_of_variants)
194
+ )
195
+
184
196
  return self.omics_variant_collection.find(
185
- query, projection, skip=skip, limit=nr_of_variants
197
+ variants_query,
198
+ projection,
199
+ skip=skip,
200
+ limit=nr_of_variants,
186
201
  )
187
202
 
188
203
  def count_omics_variants(
@@ -12,7 +12,12 @@ from scout.constants import (
12
12
  TRUSTED_REVSTAT_LEVEL,
13
13
  )
14
14
 
15
+ CLNSIG_NOT_EXISTS = {"clnsig": {"$exists": False}}
16
+ CLNSIG_NULL = {"clnsig": {"$eq": None}}
15
17
  CRITERION_EXCLUDE_OPERATOR = {False: "$in", True: "$nin"}
18
+ EXISTS = {"$exists": True}
19
+ NOT_EXISTS = {"$exists": False}
20
+ EXISTS_NOT_NULL = {"$exists": True, "$ne": None}
16
21
 
17
22
  LOG = logging.getLogger(__name__)
18
23
 
@@ -43,18 +48,16 @@ class QueryHandler(object):
43
48
  return case_query
44
49
 
45
50
  def delete_variants_query(
46
- self, case_id, variants_to_keep=[], min_rank_threshold=None, keep_ctg=[]
51
+ self,
52
+ case_id: str,
53
+ variants_to_keep: List[str] = [],
54
+ min_rank_threshold: Optional[int] = None,
55
+ keep_ctg: List[str] = [],
47
56
  ) -> dict:
48
- """Build a query to delete variants from a case
49
-
50
- Args:
51
- case_id(str): id of a case
52
- variants_to_keep(list): a list of variant ids
53
- min_rank_threshold(int): remove variants with rank lower than this number
54
- keep_ctg(list): exclude one of more variants categories from deletion. Example ["cancer", "cancer_sv"]
57
+ """Build a query to delete variants from a case (variant collection).
55
58
 
56
- Return:
57
- variant_query(dict): query dictionary
59
+ Removes variants with rank lower than `min_rank_threshold`.
60
+ Retains variants in categories `keep_ctg` by excluding them from deletion - eg `["cancer", "cancer_sv"]`.
58
61
  """
59
62
  variants_query = {}
60
63
  case_subquery = {"case_id": case_id}
@@ -202,14 +205,15 @@ class QueryHandler(object):
202
205
  'region_annotations': list,
203
206
  'functional_annotations': list,
204
207
  'clinsig': list,
205
- 'clinsig_confident_always_returned': boolean,
208
+ 'clinvar_trusted_revstat': boolean,
209
+ 'clinsig_exclude': bool,
206
210
  'variant_type': str(('research', 'clinical')),
207
211
  'chrom': str or list of str,
208
212
  'start': int,
209
213
  'end': int,
210
214
  'svtype': list,
211
215
  'size': int,
212
- 'size_shorter': boolean,
216
+ 'size_selector': str,
213
217
  'gene_panels': list(str),
214
218
  'mvl_tag": boolean,
215
219
  'clinvar_tag': boolean,
@@ -305,6 +309,9 @@ class QueryHandler(object):
305
309
  if criterion == "show_unaffected" and query.get(criterion) is False:
306
310
  self.affected_inds_query(mongo_query, case_id, gt_query)
307
311
 
312
+ if criterion == "show_soft_filtered" and query.get(criterion) is False:
313
+ self.soft_filters_query(query=query, mongo_query=mongo_query)
314
+
308
315
  ##### end of fundamental query params
309
316
 
310
317
  ##### start of the custom query params
@@ -319,18 +326,21 @@ class QueryHandler(object):
319
326
  for term in PRIMARY_CRITERIA:
320
327
  if query.get(term):
321
328
  primary_terms = True
329
+ break
322
330
 
323
331
  # check if any of the secondary criteria was specified in the query:
324
332
  for term in SECONDARY_CRITERIA:
325
333
  if query.get(term):
326
334
  secondary_terms = True
335
+ break
327
336
 
328
337
  if primary_terms is True:
329
- clinsign_filter = self.clinsig_query(query, mongo_query)
338
+ clinsign_filter: dict = self.set_and_get_clinsig_query(query, mongo_query)
330
339
 
331
340
  # Secondary, excluding filter criteria will hide variants in general,
332
341
  # but can be overridden by an including, major filter criteria
333
342
  # such as a Pathogenic ClinSig.
343
+
334
344
  if secondary_terms is True:
335
345
  secondary_filter = self.secondary_query(query, mongo_query)
336
346
  # If there are no primary criteria given, all secondary criteria are added as a
@@ -338,26 +348,35 @@ class QueryHandler(object):
338
348
  if secondary_filter and primary_terms is False:
339
349
  mongo_query["$and"] = secondary_filter
340
350
 
341
- # If there is only one primary criterion given without any secondary, it will also be
342
- # added as a top level '$and'.
343
- # Otherwise, primary criteria are added as a high level '$or' and all secondary criteria
344
- # are joined together with them as a single lower level '$and'.
345
- if primary_terms is True: # clinsig is specified
346
- # Given a request to always return confident clinical variants,
347
- # add the clnsig query as a major criteria, but only
348
- # trust clnsig entries with trusted revstat levels.
349
- if query.get("clinsig_confident_always_returned") is True:
351
+ # if prioritise_clinvar checkbox is checked, then clinical_filter will be applied in alternative to the secondary_filter ("$or")
352
+ # This will happen when the search for ClinVar annotated variants is supposed to be more relaxed compared to other filter constraints, for instance when applying the clinical filter
353
+ if primary_terms is True:
354
+ if query.get("prioritise_clinvar") is True:
350
355
  mongo_query["$or"] = [
351
356
  {"$and": secondary_filter},
352
357
  clinsign_filter,
353
358
  ]
354
- else: # clisig terms are provided but no need for trusted revstat levels
359
+ else: # clinical_filter will be applied at the same level as the other secondary filters ("$and")
360
+ if query.get("clinsig_exclude"):
361
+ clinsign_filter = {
362
+ "$or": [
363
+ clinsign_filter,
364
+ CLNSIG_NOT_EXISTS,
365
+ CLNSIG_NULL,
366
+ ]
367
+ }
355
368
  secondary_filter.append(clinsign_filter)
356
369
  mongo_query["$and"] = secondary_filter
357
370
 
358
371
  elif primary_terms is True: # clisig is provided without secondary terms query
359
- # use implicit and
360
- mongo_query["clnsig"] = clinsign_filter["clnsig"]
372
+ if query.get("clinsig_exclude"):
373
+ mongo_query["$or"] = [
374
+ clinsign_filter,
375
+ CLNSIG_NOT_EXISTS,
376
+ CLNSIG_NULL,
377
+ ]
378
+ else:
379
+ mongo_query["clnsig"] = clinsign_filter["clnsig"]
361
380
 
362
381
  # if chromosome coordinates exist in query, add them as first element of the mongo_query['$and']
363
382
  if coordinate_query:
@@ -368,6 +387,11 @@ class QueryHandler(object):
368
387
 
369
388
  return mongo_query
370
389
 
390
+ def soft_filters_query(self, query: dict, mongo_query: dict):
391
+ """Adds info to variants query to exclude variants flagged by specific filters."""
392
+ if query.get("institute_soft_filters"):
393
+ mongo_query["filters"] = {"$nin": query["institute_soft_filters"].split(",")}
394
+
371
395
  def affected_inds_query(self, mongo_query, case_id, gt_query):
372
396
  """Add info to variants query to filter out variants which are only in unaffected individuals
373
397
 
@@ -393,7 +417,10 @@ class QueryHandler(object):
393
417
  for ind in case_inds:
394
418
  if ind["phenotype"] in [1, "unaffected"]: # 1=unaffected, 2=affected
395
419
  continue
396
- affected_match = {"sample_id": ind["individual_id"], "genotype_call": gt_query}
420
+ affected_match = {
421
+ "sample_id": ind["individual_id"],
422
+ "genotype_call": gt_query,
423
+ }
397
424
  affected_query["$elemMatch"]["$or"].append(affected_match)
398
425
 
399
426
  if affected_query["$elemMatch"][
@@ -401,65 +428,52 @@ class QueryHandler(object):
401
428
  ]: # Consider situation where all individuals are unaffected
402
429
  mongo_query["samples"] = affected_query
403
430
 
404
- def clinsig_query(self, query, mongo_query):
405
- """Add clinsig filter values to the mongo query object
431
+ def set_and_get_clinsig_query(self, query: dict, mongo_query: dict) -> dict:
432
+ """Add clinsig filter values to the mongo query object. if clinvar_tag esists in query then only results with ClinVar annotation are returned."""
406
433
 
407
- Args:
408
- query(dict): a dictionary of query filters specified by the users
409
- mongo_query(dict): the query that is going to be submitted to the database
434
+ clnsig_query = {"clnsig": {}}
410
435
 
411
- Returns:
412
- clinsig_query(dict): a dictionary with clinsig key-values
436
+ if query.get("clinsig"): # If any ClinVar significance was selected in the form multiselect
437
+ rank = []
438
+ str_rank = []
439
+ for item in query["clinsig"]:
440
+ rank.append(int(item))
441
+ # search for human readable clinsig values in newer cases
442
+ rank.append(CLINSIG_MAP[int(item)])
443
+ str_rank.append(CLINSIG_MAP[int(item)])
413
444
 
414
- """
415
- LOG.debug("clinsig is a query parameter")
416
- trusted_revision_level = TRUSTED_REVSTAT_LEVEL
417
- rank = []
418
- str_rank = []
419
- clnsig_query = {}
420
-
421
- for item in query["clinsig"]:
422
- rank.append(int(item))
423
- # search for human readable clinsig values in newer cases
424
- rank.append(CLINSIG_MAP[int(item)])
425
- str_rank.append(CLINSIG_MAP[int(item)])
426
-
427
- if query.get("clinsig_confident_always_returned") is True:
428
- LOG.debug("add CLINSIG filter with trusted_revision_level")
429
-
430
- clnsig_query = {
431
- "clnsig": {
445
+ elem_match = [
446
+ {"value": {"$in": rank}},
447
+ {"value": re.compile("|".join(str_rank))},
448
+ ]
449
+
450
+ if query.get("clinsig_exclude"):
451
+ elem_match_or = {"$nor": elem_match}
452
+ else:
453
+ elem_match_or = {"$or": elem_match}
454
+
455
+ if query.get("clinvar_trusted_revstat") is True:
456
+ clnsig_query["clnsig"] = {
432
457
  "$elemMatch": {
433
458
  "$and": [
434
- {
435
- "$or": [
436
- {"value": {"$in": rank}},
437
- {"value": re.compile("|".join(str_rank))},
438
- ]
439
- },
440
- {"revstat": re.compile("|".join(trusted_revision_level))},
459
+ elem_match_or,
460
+ {"revstat": re.compile("|".join(TRUSTED_REVSTAT_LEVEL))},
441
461
  ]
442
462
  }
443
463
  }
444
- }
445
- else:
446
- LOG.debug("add CLINSIG filter for rank: %s" % ", ".join(str(query["clinsig"])))
464
+ else:
465
+ clnsig_query["clnsig"] = {"$elemMatch": elem_match_or}
466
+
467
+ if query.get("clinvar_tag"):
468
+ mongo_query["clnsig"] = EXISTS_NOT_NULL # Used when query has secondary terms
469
+ clnsig_query["clnsig"]["$exists"] = True
470
+ clnsig_query["clnsig"]["$ne"] = None
447
471
 
448
- clnsig_query = {
449
- "clnsig": {
450
- "$elemMatch": {
451
- "$or": [
452
- {"value": {"$in": rank}},
453
- {"value": re.compile("|".join(str_rank))},
454
- ]
455
- }
456
- }
457
- }
458
472
  return clnsig_query
459
473
 
460
474
  def coordinate_filter(self, query, mongo_query):
461
475
  """Adds genomic coordinated-related filters to the query object
462
- This method is called to buid coordinate query for non-sv variants
476
+ This method is called to build coordinate query for non-sv variants
463
477
 
464
478
  Args:
465
479
  query(dict): a dictionary of query filters specified by the users
@@ -618,7 +632,7 @@ class QueryHandler(object):
618
632
  {
619
633
  "$or": [
620
634
  {"gnomad_frequency": {"$lt": float(gnomad)}},
621
- {"gnomad_frequency": {"$exists": False}},
635
+ {"gnomad_frequency": NOT_EXISTS},
622
636
  ]
623
637
  }
624
638
  )
@@ -656,7 +670,7 @@ class QueryHandler(object):
656
670
  {
657
671
  "$or": [
658
672
  {"swegen_mei_max": {"$lt": float(swegen)}},
659
- {"swegen_mei_max": {"$exists": False}},
673
+ {"swegen_mei_max": NOT_EXISTS},
660
674
  ]
661
675
  }
662
676
  )
@@ -665,7 +679,7 @@ class QueryHandler(object):
665
679
  mongo_secondary_query.append(
666
680
  {
667
681
  "$or": [
668
- {criterion: {"$exists": False}},
682
+ {criterion: NOT_EXISTS},
669
683
  {criterion: {"$lt": query[criterion] + 1}},
670
684
  ]
671
685
  }
@@ -677,7 +691,7 @@ class QueryHandler(object):
677
691
 
678
692
  spidex_query_or_part = []
679
693
  if "not_reported" in spidex_human:
680
- spidex_query_or_part.append({"spidex": {"$exists": False}})
694
+ spidex_query_or_part.append({"spidex": NOT_EXISTS})
681
695
 
682
696
  for spidex_level in SPIDEX_HUMAN:
683
697
  if spidex_level in spidex_human:
@@ -721,7 +735,7 @@ class QueryHandler(object):
721
735
  if criterion == "revel":
722
736
  revel = query["revel"]
723
737
  revel_query = {"revel": {"$gt": float(revel)}}
724
- revel_query = {"$or": [revel_query, {"revel": {"$exists": False}}]}
738
+ revel_query = {"$or": [revel_query, {"revel": NOT_EXISTS}]}
725
739
 
726
740
  mongo_secondary_query.append(revel_query)
727
741
 
@@ -729,7 +743,7 @@ class QueryHandler(object):
729
743
  rank_score_query = {
730
744
  "$or": [
731
745
  {"rank_score": {"$gte": float(query["rank_score"])}},
732
- {"rank_score": {"$exists": False}},
746
+ {"rank_score": NOT_EXISTS},
733
747
  ]
734
748
  }
735
749
  mongo_secondary_query.append(rank_score_query)
@@ -739,7 +753,7 @@ class QueryHandler(object):
739
753
  cadd_query = {"cadd_score": {"$gt": float(cadd)}}
740
754
 
741
755
  if query.get("cadd_inclusive") is True:
742
- cadd_query = {"$or": [cadd_query, {"cadd_score": {"$exists": False}}]}
756
+ cadd_query = {"$or": [cadd_query, {"cadd_score": NOT_EXISTS}]}
743
757
 
744
758
  mongo_secondary_query.append(cadd_query)
745
759
 
@@ -763,15 +777,14 @@ class QueryHandler(object):
763
777
 
764
778
  if criterion == "size":
765
779
  size = query["size"]
766
- size_query = {"length": {"$gt": int(size)}}
780
+ size_selector = query.get("size_selector")
767
781
 
768
- if query.get("size_shorter"):
769
- size_query = {
770
- "$or": [
771
- {"length": {"$lt": int(size)}},
772
- {"length": {"$exists": False}},
773
- ]
774
- }
782
+ size_query = {
783
+ "$or": [
784
+ {"$expr": {size_selector: [{"$abs": "$length"}, size]}},
785
+ {"length": NOT_EXISTS}, # Include documents where 'length' is missing
786
+ ]
787
+ }
775
788
 
776
789
  mongo_secondary_query.append(size_query)
777
790
 
@@ -780,7 +793,7 @@ class QueryHandler(object):
780
793
  mongo_secondary_query.append({"sub_category": {"$in": svtype}})
781
794
 
782
795
  if criterion == "decipher":
783
- mongo_query["decipher"] = {"$exists": True}
796
+ mongo_query["decipher"] = EXISTS
784
797
 
785
798
  if criterion == "depth":
786
799
  mongo_secondary_query.append({"tumor.read_depth": {"$gt": query.get("depth")}})
@@ -793,7 +806,7 @@ class QueryHandler(object):
793
806
  {
794
807
  "$or": [
795
808
  {"somatic_score": {"$gt": query.get("somatic_score")}},
796
- {"somatic_score": {"$exists": False}},
809
+ {"somatic_score": NOT_EXISTS},
797
810
  ]
798
811
  }
799
812
  )
@@ -809,14 +822,10 @@ class QueryHandler(object):
809
822
  )
810
823
 
811
824
  if criterion == "mvl_tag":
812
- mongo_secondary_query.append({"mvl_tag": {"$exists": True}})
813
-
814
- if criterion == "clinvar_tag":
815
- mongo_secondary_query.append({"clnsig": {"$exists": True}})
816
- mongo_secondary_query.append({"clnsig": {"$ne": None}})
825
+ mongo_secondary_query.append({"mvl_tag": EXISTS})
817
826
 
818
827
  if criterion == "cosmic_tag":
819
- mongo_secondary_query.append({"cosmic_ids": {"$exists": True}})
828
+ mongo_secondary_query.append({"cosmic_ids": EXISTS})
820
829
  mongo_secondary_query.append({"cosmic_ids": {"$ne": None}})
821
830
 
822
831
  if criterion == "fusion_score":
@@ -877,11 +877,6 @@ class VariantHandler(VariantLoader):
877
877
  }
878
878
  }
879
879
  """
880
- LOG.info(
881
- "Retrieving variants by category for case: {0}, institute: {1}".format(
882
- case_id, institute_id
883
- )
884
- )
885
880
 
886
881
  case_obj = self.case(case_id=case_id)
887
882
  variants_stats = case_obj.get("variants_stats") or {}