loqusdb 2.7.18__tar.gz → 2.7.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. {loqusdb-2.7.18 → loqusdb-2.7.20}/PKG-INFO +1 -1
  2. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/__init__.py +1 -30
  3. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/build_models/profile_variant.py +6 -4
  4. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/build_models/variant.py +36 -20
  5. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/annotate.py +3 -2
  6. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/cli.py +25 -2
  7. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/delete.py +7 -1
  8. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/export.py +14 -3
  9. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/load.py +2 -1
  10. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/load_profile.py +4 -3
  11. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/constants/__init__.py +60 -28
  12. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/annotate.py +4 -18
  13. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/delete.py +27 -5
  14. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/load.py +12 -6
  15. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/profiling.py +4 -4
  16. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/vcf.py +9 -9
  17. {loqusdb-2.7.18 → loqusdb-2.7.20}/pyproject.toml +1 -1
  18. loqusdb-2.7.20/tests/build_models/test_build_variant.py +30 -0
  19. loqusdb-2.7.20/tests/build_models/test_is_greater.py +98 -0
  20. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/conftest.py +67 -1
  21. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/functional/test_cli.py +1 -1
  22. loqusdb-2.7.20/tests/plugins/mongo/test_get_sv.py +62 -0
  23. loqusdb-2.7.20/tests/plugins/mongo/test_load_svs.py +164 -0
  24. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/plugins/mongo/test_variant_operations.py +65 -2
  25. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_delete.py +57 -5
  26. loqusdb-2.7.20/tests/utils/test_delete_variant.py +157 -0
  27. loqusdb-2.7.20/tests/utils/test_load_database.py +108 -0
  28. loqusdb-2.7.20/tests/utils/test_load_variants.py +398 -0
  29. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_profiling.py +1 -1
  30. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/vcf_tools/test_check_par.py +11 -10
  31. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/vcf_tools/test_check_vcf.py +1 -1
  32. loqusdb-2.7.20/tests/vcf_tools/test_format_sv_variant.py +244 -0
  33. loqusdb-2.7.20/tests/vcf_tools/test_format_variant.py +283 -0
  34. loqusdb-2.7.18/tests/build_models/test_build_variant.py +0 -15
  35. loqusdb-2.7.18/tests/build_models/test_is_greater.py +0 -49
  36. loqusdb-2.7.18/tests/plugins/mongo/test_get_sv.py +0 -27
  37. loqusdb-2.7.18/tests/plugins/mongo/test_load_svs.py +0 -74
  38. loqusdb-2.7.18/tests/utils/test_delete_variant.py +0 -74
  39. loqusdb-2.7.18/tests/utils/test_load_database.py +0 -52
  40. loqusdb-2.7.18/tests/utils/test_load_variants.py +0 -225
  41. loqusdb-2.7.18/tests/vcf_tools/test_format_sv_variant.py +0 -102
  42. loqusdb-2.7.18/tests/vcf_tools/test_format_variant.py +0 -113
  43. {loqusdb-2.7.18 → loqusdb-2.7.20}/LICENSE +0 -0
  44. {loqusdb-2.7.18 → loqusdb-2.7.20}/README.md +0 -0
  45. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/__main__.py +0 -0
  46. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/build_models/__init__.py +0 -0
  47. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/build_models/case.py +0 -0
  48. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/__init__.py +0 -0
  49. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/identity.py +0 -0
  50. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/migrate.py +0 -0
  51. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/restore.py +0 -0
  52. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/update.py +0 -0
  53. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/view.py +0 -0
  54. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/commands/wipe.py +0 -0
  55. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/exceptions/__init__.py +0 -0
  56. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/exceptions/case.py +0 -0
  57. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/exceptions/profile.py +0 -0
  58. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/exceptions/vcf.py +0 -0
  59. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/log.py +0 -0
  60. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/models/__init__.py +0 -0
  61. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/models/case.py +0 -0
  62. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/models/identity.py +0 -0
  63. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/models/profile_variant.py +0 -0
  64. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/models/variant.py +0 -0
  65. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/__init__.py +0 -0
  66. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/mongo/__init__.py +0 -0
  67. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/mongo/adapter.py +0 -0
  68. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/mongo/case.py +0 -0
  69. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/mongo/profile_variant.py +0 -0
  70. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/mongo/structural_variant.py +0 -0
  71. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/plugins/mongo/variant.py +0 -0
  72. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/resources/__init__.py +0 -0
  73. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/resources/loqusdb.20181005.gz +0 -0
  74. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/resources/maf_50_sites_GRCh37.vcf.gz +0 -0
  75. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/resources/maf_50_sites_GRCh38.vcf.gz +0 -0
  76. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/__init__.py +0 -0
  77. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/case.py +0 -0
  78. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/migrate.py +0 -0
  79. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/update.py +0 -0
  80. {loqusdb-2.7.18 → loqusdb-2.7.20}/loqusdb/utils/variant.py +0 -0
  81. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/build_models/test_build_case.py +0 -0
  82. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/commands/test_export.py +0 -0
  83. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/commands/test_identity.py +0 -0
  84. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/commands/test_view.py +0 -0
  85. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/643594.clinical.SV.vcf +0 -0
  86. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/643594.clinical.vcf.gz +0 -0
  87. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/double_variant.vcf +0 -0
  88. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/funny_trio.ped +0 -0
  89. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/profile_snv.vcf +0 -0
  90. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/recessive_trio.ped +0 -0
  91. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/test.SV.vcf +0 -0
  92. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/test.vcf +0 -0
  93. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/test.vcf.gz +0 -0
  94. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/test.vcf.gz.tbi +0 -0
  95. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/fixtures/unsorted.vcf +0 -0
  96. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/plugins/mongo/test_case_operations.py +0 -0
  97. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/plugins/mongo/test_connect.py +0 -0
  98. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/plugins/mongo/test_flask_extension.py +0 -0
  99. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_case.py +0 -0
  100. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_delete_family.py +0 -0
  101. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_get_family.py +0 -0
  102. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_load_family.py +0 -0
  103. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/utils/test_migrate.py +0 -0
  104. {loqusdb-2.7.18 → loqusdb-2.7.20}/tests/vcf_tools/test_vcf.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: loqusdb
3
- Version: 2.7.18
3
+ Version: 2.7.20
4
4
  Summary: A simple observation count database
5
5
  License: MIT
6
6
  Author: Your Name
@@ -4,7 +4,7 @@ from pymongo import ASCENDING, IndexModel
4
4
 
5
5
  logger = logging.getLogger(__name__)
6
6
 
7
- __version__ = "2.7.18"
7
+ __version__ = "2.7.20"
8
8
 
9
9
  INDEXES = {
10
10
  "variant": [
@@ -61,32 +61,3 @@ INDEXES = {
61
61
  ),
62
62
  ],
63
63
  }
64
-
65
- CHROMOSOME_ORDER = (
66
- "1",
67
- "2",
68
- "3",
69
- "4",
70
- "5",
71
- "6",
72
- "7",
73
- "8",
74
- "9",
75
- "10",
76
- "11",
77
- "12",
78
- "13",
79
- "14",
80
- "15",
81
- "16",
82
- "17",
83
- "18",
84
- "19",
85
- "20",
86
- "21",
87
- "22",
88
- "23",
89
- "X",
90
- "Y",
91
- "MT",
92
- )
@@ -24,23 +24,25 @@ def get_maf(variant):
24
24
  return variant.INFO.get("MAF")
25
25
 
26
26
 
27
- def build_profile_variant(variant):
27
+ def build_profile_variant(variant, keep_chr_prefix=None):
28
28
  """Returns a ProfileVariant object
29
29
 
30
30
  Args:
31
31
  variant (cyvcf2.Variant)
32
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
32
33
 
33
34
  Returns:
34
35
  variant (models.ProfileVariant)
35
36
  """
36
37
 
37
38
  chrom = variant.CHROM
38
- if chrom.startswith(("chr", "CHR", "Chr")):
39
- chrom = chrom[3:]
39
+ if not keep_chr_prefix:
40
+ if chrom.startswith(("chr", "CHR", "Chr")):
41
+ chrom = chrom[3:]
40
42
 
41
43
  pos = int(variant.POS)
42
44
 
43
- variant_id = get_variant_id(variant)
45
+ variant_id = get_variant_id(variant, keep_chr_prefix)
44
46
 
45
47
  ref = variant.REF
46
48
  alt = variant.ALT[0]
@@ -15,49 +15,59 @@ Position = namedtuple("Position", "chrom pos")
15
15
  # These are coordinate for the pseudo autosomal regions in GRCh37
16
16
 
17
17
 
18
- def check_par(chrom, pos, genome_build=None):
18
+ def check_par(chrom, pos, genome_build):
19
19
  """Check if a coordinate is in the PAR region
20
20
 
21
21
  Args:
22
22
  chrom(str)
23
23
  pos(int)
24
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
24
25
 
25
26
  Returns:
26
27
  par(bool)
27
28
  """
28
- if genome_build is None:
29
- genome_build = GRCH37
30
29
  return any(
31
30
  pos >= interval[0] and pos <= interval[1] for interval in PAR[genome_build].get(chrom, [])
32
31
  )
33
32
 
34
33
 
35
- def get_variant_id(variant):
36
- """Get a variant id on the format chrom_pos_ref_alt"""
34
+ def get_variant_id(variant, keep_chr_prefix=None):
35
+ """Get a variant id on the format chrom_pos_ref_alt
36
+
37
+ Args:
38
+ variant (cyvcf2.Variant)
39
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
40
+
41
+ Returns:
42
+ variant (models.ProfileVariant)
43
+ """
44
+
37
45
  chrom = variant.CHROM
38
- if chrom.lower().startswith("chr"):
39
- chrom = chrom[3:]
46
+ if not keep_chr_prefix:
47
+ if chrom.lower().startswith("chr"):
48
+ chrom = chrom[3:]
40
49
  return "_".join([str(chrom), str(variant.POS), str(variant.REF), str(variant.ALT[0])])
41
50
 
42
51
 
43
- def is_greater(a, b):
52
+ def is_greater(a, b, genome_build):
44
53
  """Check if position a is greater than position b
45
54
  This will look at chromosome and position.
46
55
 
47
56
  For example a position where chrom = 2 and pos = 300 is greater than a position where
48
57
  chrom = 1 and pos = 1000
49
58
 
50
- If any of the chromosomes is outside [1-22,X,Y,MT] we can not say which is biggest.
59
+ If any of the chromosomes is outside [1-22,X,Y,MT] or [chr1-chr22,chrX,chrY,chrM] we can not say which is biggest.
51
60
 
52
61
  Args:
53
62
  a,b(Position)
63
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
54
64
 
55
65
  Returns:
56
66
  bool: True if a is greater than b
57
67
  """
58
68
 
59
- a_chrom = CHROM_TO_INT.get(a.chrom, 0)
60
- b_chrom = CHROM_TO_INT.get(b.chrom, 0)
69
+ a_chrom = CHROM_TO_INT[genome_build].get(a.chrom, 0)
70
+ b_chrom = CHROM_TO_INT[genome_build].get(b.chrom, 0)
61
71
 
62
72
  if a_chrom == 0 or b_chrom == 0:
63
73
  return False
@@ -68,11 +78,13 @@ def is_greater(a, b):
68
78
  return a_chrom == b_chrom and a.pos > b.pos
69
79
 
70
80
 
71
- def get_coords(variant):
81
+ def get_coords(variant, keep_chr_prefix, genome_build):
72
82
  """Returns a dictionary with position information
73
83
 
74
84
  Args:
75
85
  variant(cyvcf2.Variant)
86
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
87
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
76
88
 
77
89
  Returns:
78
90
  coordinates(dict)
@@ -86,8 +98,9 @@ def get_coords(variant):
86
98
  "end": None,
87
99
  }
88
100
  chrom = variant.CHROM
89
- if chrom.startswith(("chr", "CHR", "Chr")):
90
- chrom = chrom[3:]
101
+ if not keep_chr_prefix:
102
+ if chrom.startswith(("chr", "CHR", "Chr")):
103
+ chrom = chrom[3:]
91
104
  coordinates["chrom"] = chrom
92
105
  end_chrom = chrom
93
106
 
@@ -107,8 +120,9 @@ def get_coords(variant):
107
120
  if sv_type == "BND":
108
121
  other_coordinates = alt.strip("ATCGN").strip("[]").split(":")
109
122
  end_chrom = other_coordinates[0]
110
- if end_chrom.startswith(("chr", "CHR", "Chr")):
111
- end_chrom = end_chrom[3:]
123
+ if not keep_chr_prefix:
124
+ if end_chrom.startswith(("chr", "CHR", "Chr")):
125
+ end_chrom = end_chrom[3:]
112
126
 
113
127
  end = int(other_coordinates[1])
114
128
 
@@ -126,7 +140,7 @@ def get_coords(variant):
126
140
  end_position = Position(end_chrom, end)
127
141
 
128
142
  # If 'start' is greater than 'end', switch positions
129
- if is_greater(position, end_position):
143
+ if is_greater(position, end_position, genome_build=genome_build):
130
144
  end_chrom = position.chrom
131
145
  end = position.pos
132
146
 
@@ -148,6 +162,7 @@ def build_variant(
148
162
  case_id: Optional[str] = None,
149
163
  gq_threshold: Optional[int] = None,
150
164
  gq_qual: Optional[bool] = False,
165
+ keep_chr_prefix: Optional[bool] = False,
151
166
  ignore_gq_if_unset: Optional[bool] = False,
152
167
  genome_build: Optional[str] = None,
153
168
  ) -> Variant:
@@ -165,6 +180,7 @@ def build_variant(
165
180
  case_id(str): The case id
166
181
  gq_threshold(int): Genotype Quality threshold
167
182
  gq_qual(bool): Use variant.QUAL for quality instead of GQ
183
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
168
184
  ignore_gq_if_unset(bool): Ignore GQ threshold check for variants that do not have GQ or QUAL set.
169
185
  genome_build(str): Genome build. Ex. GRCh37 or GRCh38
170
186
 
@@ -179,14 +195,14 @@ def build_variant(
179
195
  sv = True
180
196
 
181
197
  # chrom_pos_ref_alt
182
- variant_id = get_variant_id(variant)
198
+ variant_id = get_variant_id(variant, keep_chr_prefix)
183
199
 
184
200
  ref = variant.REF
185
201
  # ALT is an array in cyvcf2
186
202
  # We allways assume splitted and normalized VCFs
187
203
  alt = variant.ALT[0]
188
204
 
189
- coordinates = get_coords(variant)
205
+ coordinates = get_coords(variant, keep_chr_prefix, genome_build=genome_build)
190
206
  chrom = coordinates["chrom"]
191
207
  pos = coordinates["pos"]
192
208
 
@@ -224,7 +240,7 @@ def build_variant(
224
240
  # If variant in X or Y and individual is male,
225
241
  # we need to check hemizygosity
226
242
  if (
227
- chrom in ["X", "Y"]
243
+ chrom in ["X", "Y", "chrX", "chrY"]
228
244
  and ind_obj["sex"] == 1
229
245
  and not check_par(chrom, pos, genome_build=genome_build)
230
246
  ):
@@ -21,6 +21,7 @@ LOG = logging.getLogger(__name__)
21
21
  def annotate(ctx, variant_file, sv):
22
22
  """Annotate the variants in a VCF"""
23
23
  adapter = ctx.obj["adapter"]
24
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
24
25
 
25
26
  variant_path = os.path.abspath(variant_file)
26
27
 
@@ -40,9 +41,9 @@ def annotate(ctx, variant_file, sv):
40
41
  start_inserting = datetime.now()
41
42
 
42
43
  if sv:
43
- annotated_variants = annotate_svs(adapter, vcf_obj)
44
+ annotated_variants = annotate_svs(adapter, vcf_obj, keep_chr_prefix)
44
45
  else:
45
- annotated_variants = annotate_snvs(adapter, vcf_obj)
46
+ annotated_variants = annotate_snvs(adapter, vcf_obj, keep_chr_prefix)
46
47
  # try:
47
48
  for variant in annotated_variants:
48
49
  click.echo(str(variant).rstrip())
@@ -52,14 +52,35 @@ LOG = logging.getLogger(__name__)
52
52
  @click.option(
53
53
  "-g",
54
54
  "--genome-build",
55
+ default="GRCh37",
56
+ show_default=True,
55
57
  type=click.Choice([GRCH37, GRCH38]),
56
58
  help="Specify what genome build to use",
57
59
  )
60
+ @click.option(
61
+ "--keep-chr-prefix",
62
+ is_flag=True,
63
+ default=False,
64
+ show_default=True,
65
+ help="Retain the 'chr/Chr/CHR' prefix for chromosomes if it is present",
66
+ )
58
67
  @click.option("-v", "--verbose", is_flag=True)
59
68
  @click.version_option(__version__)
60
69
  @click.pass_context
61
70
  def cli(
62
- ctx, database, username, password, authdb, port, host, uri, verbose, config, test, genome_build
71
+ ctx,
72
+ database,
73
+ username,
74
+ password,
75
+ authdb,
76
+ port,
77
+ host,
78
+ uri,
79
+ verbose,
80
+ config,
81
+ test,
82
+ genome_build,
83
+ keep_chr_prefix,
63
84
  ):
64
85
  """loqusdb: manage a local variant count database."""
65
86
  loglevel = "INFO"
@@ -102,7 +123,8 @@ def cli(
102
123
 
103
124
  adapter = MongoAdapter(client, db_name=database)
104
125
 
105
- genome_build = genome_build or configs.get("genome_build") or GRCH37
126
+ genome_build = genome_build or configs.get("genome_build")
127
+ keep_chr_prefix = keep_chr_prefix or configs.get("keep_chr_prefix")
106
128
 
107
129
  ctx.obj = {}
108
130
  ctx.obj["db"] = database
@@ -114,3 +136,4 @@ def cli(
114
136
  ctx.obj["adapter"] = adapter
115
137
  ctx.obj["version"] = __version__
116
138
  ctx.obj["genome_build"] = genome_build
139
+ ctx.obj["keep_chr_prefix"] = keep_chr_prefix
@@ -35,6 +35,7 @@ def delete(ctx, family_file, family_type, case_id):
35
35
  ctx.abort()
36
36
 
37
37
  adapter = ctx.obj["adapter"]
38
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
38
39
 
39
40
  # Get a ped_parser.Family object from family file
40
41
  family = None
@@ -59,7 +60,12 @@ def delete(ctx, family_file, family_type, case_id):
59
60
  genome_build = ctx.obj["genome_build"]
60
61
  start_deleting = datetime.now()
61
62
  try:
62
- delete_command(adapter=adapter, case_obj=existing_case, genome_build=genome_build)
63
+ delete_command(
64
+ adapter=adapter,
65
+ case_obj=existing_case,
66
+ genome_build=genome_build,
67
+ keep_chr_prefix=keep_chr_prefix,
68
+ )
63
69
  except (CaseError, IOError) as error:
64
70
  LOG.warning(error)
65
71
  ctx.abort()
@@ -2,7 +2,7 @@ import logging
2
2
  from datetime import datetime
3
3
 
4
4
  import click
5
- from loqusdb import CHROMOSOME_ORDER
5
+ from loqusdb.constants import CHROMOSOMES, GRCH37, GRCH38
6
6
  from loqusdb.utils.variant import format_variant
7
7
  from vcftoolbox import HeaderParser, print_headers, print_variant
8
8
 
@@ -43,11 +43,22 @@ def export(ctx, outfile, variant_type, freq):
43
43
  is_sv = variant_type == "sv"
44
44
  existing_chromosomes = set(adapter.get_chromosomes(sv=is_sv))
45
45
 
46
+ genome = ctx.obj["genome_build"]
47
+ chromosome_order = CHROMOSOMES[genome]
48
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
49
+
46
50
  ordered_chromosomes = []
47
- for chrom in CHROMOSOME_ORDER:
48
- if chrom in existing_chromosomes:
51
+ for chrom in chromosome_order:
52
+ if keep_chr_prefix and chrom in existing_chromosomes:
49
53
  ordered_chromosomes.append(chrom)
50
54
  existing_chromosomes.remove(chrom)
55
+ elif not keep_chr_prefix:
56
+ if genome == GRCH37 and chrom in existing_chromosomes:
57
+ ordered_chromosomes.append(chrom)
58
+ existing_chromosomes.remove(chrom)
59
+ elif genome == GRCH38 and chrom[3:] in existing_chromosomes:
60
+ ordered_chromosomes.append(chrom)
61
+ existing_chromosomes.remove(chrom)
51
62
  for chrom in existing_chromosomes:
52
63
  ordered_chromosomes.append(chrom)
53
64
 
@@ -148,7 +148,7 @@ def load(
148
148
 
149
149
  adapter = ctx.obj["adapter"]
150
150
  genome_build = ctx.obj["genome_build"]
151
-
151
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
152
152
  start_inserting = datetime.now()
153
153
 
154
154
  try:
@@ -162,6 +162,7 @@ def load(
162
162
  case_id=case_id,
163
163
  gq_threshold=gq_threshold,
164
164
  snv_gq_only=snv_gq_only,
165
+ keep_chr_prefix=keep_chr_prefix,
165
166
  qual_gq=qual_gq,
166
167
  max_window=max_window,
167
168
  profile_file=variant_profile_path,
@@ -60,13 +60,14 @@ def load_profile(ctx, load, variant_file, update, stats, profile_threshold, chec
60
60
  """
61
61
 
62
62
  adapter = ctx.obj["adapter"]
63
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
63
64
 
64
65
  LOG.info("Running loqusdb profile")
65
66
 
66
67
  if check_vcf:
67
68
  LOG.info(f"Check if profile in {check_vcf} has match in database")
68
69
  vcf_file = check_vcf
69
- profiles = get_profiles(adapter, vcf_file)
70
+ profiles = get_profiles(adapter, vcf_file, keep_chr_prefix)
70
71
  duplicate = check_duplicates(adapter, profiles, profile_threshold)
71
72
 
72
73
  if duplicate is not None:
@@ -81,11 +82,11 @@ def load_profile(ctx, load, variant_file, update, stats, profile_threshold, chec
81
82
  if variant_file is not None:
82
83
  vcf_path = variant_file
83
84
  LOG.info(f"Loads variants in {vcf_path} to be used in profiling")
84
- load_profile_variants(adapter, vcf_path)
85
+ load_profile_variants(adapter, vcf_path, keep_chr_prefix)
85
86
 
86
87
  if update:
87
88
  LOG.info("Updates profiles in database")
88
- update_profiles(adapter)
89
+ update_profiles(adapter, keep_chr_prefix)
89
90
 
90
91
  if stats:
91
92
  LOG.info("Prints profile stats")
@@ -17,35 +17,67 @@ PAR = {
17
17
  GENOTYPE_MAP = {0: "hom_ref", 1: "het", 2: "no_call", 3: "hom_alt"}
18
18
 
19
19
  # To keep the order of chromosomes
20
- CHROMOSOMES = (
21
- "1",
22
- "2",
23
- "3",
24
- "4",
25
- "5",
26
- "6",
27
- "7",
28
- "8",
29
- "9",
30
- "10",
31
- "11",
32
- "12",
33
- "13",
34
- "14",
35
- "15",
36
- "16",
37
- "17",
38
- "18",
39
- "19",
40
- "20",
41
- "21",
42
- "22",
43
- "X",
44
- "Y",
45
- "MT",
46
- )
20
+ CHROMOSOMES = {
21
+ GRCH37: [
22
+ "1",
23
+ "2",
24
+ "3",
25
+ "4",
26
+ "5",
27
+ "6",
28
+ "7",
29
+ "8",
30
+ "9",
31
+ "10",
32
+ "11",
33
+ "12",
34
+ "13",
35
+ "14",
36
+ "15",
37
+ "16",
38
+ "17",
39
+ "18",
40
+ "19",
41
+ "20",
42
+ "21",
43
+ "22",
44
+ "X",
45
+ "Y",
46
+ "MT",
47
+ ],
48
+ GRCH38: [
49
+ "chr1",
50
+ "chr2",
51
+ "chr3",
52
+ "chr4",
53
+ "chr5",
54
+ "chr6",
55
+ "chr7",
56
+ "chr8",
57
+ "chr9",
58
+ "chr10",
59
+ "chr11",
60
+ "chr12",
61
+ "chr13",
62
+ "chr14",
63
+ "chr15",
64
+ "chr16",
65
+ "chr17",
66
+ "chr18",
67
+ "chr19",
68
+ "chr20",
69
+ "chr21",
70
+ "chr22",
71
+ "chrX",
72
+ "chrY",
73
+ "chrM",
74
+ ],
75
+ }
47
76
 
48
- CHROM_TO_INT = {chrom: i + 1 for i, chrom in enumerate(CHROMOSOMES)}
77
+ CHROM_TO_INT = {
78
+ build: {chrom: i + 1 for i, chrom in enumerate(chromosomes)}
79
+ for build, chromosomes in CHROMOSOMES.items()
80
+ }
49
81
 
50
82
  # Ranges of hamming distances to be checked when 'loqusdb profile --stats'
51
83
  # items: <range_name>: <range>
@@ -31,21 +31,7 @@ def annotate_variant(variant, var_obj=None):
31
31
  return variant
32
32
 
33
33
 
34
- def annotate_snv(adpter, variant):
35
- """Annotate an SNV/INDEL variant
36
-
37
- Args:
38
- adapter(loqusdb.plugin.adapter)
39
- variant(cyvcf2.Variant)
40
- """
41
- variant_id = get_variant_id(variant)
42
- variant_obj = adapter.get_variant(variant={"_id": variant_id})
43
-
44
- annotated_variant = annotated_variant(variant, variant_obj)
45
- return annotated_variant
46
-
47
-
48
- def annotate_svs(adapter, vcf_obj):
34
+ def annotate_svs(adapter, vcf_obj, keep_chr_prefix):
49
35
  """Annotate all SV variants in a VCF
50
36
 
51
37
  Args:
@@ -56,14 +42,14 @@ def annotate_svs(adapter, vcf_obj):
56
42
  variant(cyvcf2.Variant)
57
43
  """
58
44
  for nr_variants, variant in enumerate(vcf_obj, 1):
59
- variant_info = get_coords(variant)
45
+ variant_info = get_coords(variant, keep_chr_prefix)
60
46
  match = adapter.get_structural_variant(variant_info)
61
47
  if match:
62
48
  annotate_variant(variant, match)
63
49
  yield variant
64
50
 
65
51
 
66
- def annotate_snvs(adapter, vcf_obj):
52
+ def annotate_snvs(adapter, vcf_obj, keep_chr_prefix):
67
53
  """Annotate all variants in a VCF
68
54
 
69
55
  Args:
@@ -77,7 +63,7 @@ def annotate_snvs(adapter, vcf_obj):
77
63
 
78
64
  for nr_variants, variant in enumerate(vcf_obj, 1):
79
65
  # Add the variant to current batch
80
- variants[get_variant_id(variant)] = variant
66
+ variants[get_variant_id(variant, keep_chr_prefix)] = variant
81
67
  # If batch len == 1000 we annotate the batch
82
68
  if (nr_variants % 1000) == 0:
83
69
 
@@ -9,7 +9,9 @@ from loqusdb.build_models.variant import build_variant
9
9
  LOG = logging.getLogger(__name__)
10
10
 
11
11
 
12
- def delete(adapter, case_obj, update=False, existing_case=False, genome_build=None):
12
+ def delete(
13
+ adapter, case_obj, keep_chr_prefix=None, update=False, existing_case=False, genome_build=None
14
+ ):
13
15
  """Delete a case and all of it's variants from the database.
14
16
 
15
17
  Args:
@@ -18,6 +20,8 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
18
20
  update(bool): If we are in the middle of an update
19
21
  existing_case(models.Case): If something failed during an update we need to revert
20
22
  to the original case
23
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefixes in chromosome IDs when they are present
24
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
21
25
 
22
26
  """
23
27
  # This will overwrite the updated case with the previous one
@@ -36,7 +40,11 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
36
40
  if file_type == "vcf_path":
37
41
  LOG.info("deleting variants")
38
42
  delete_variants(
39
- adapter=adapter, vcf_obj=vcf_obj, case_obj=case_obj, genome_build=genome_build
43
+ adapter=adapter,
44
+ vcf_obj=vcf_obj,
45
+ keep_chr_prefix=keep_chr_prefix,
46
+ case_obj=case_obj,
47
+ genome_build=genome_build,
40
48
  )
41
49
  elif file_type == "vcf_sv_path":
42
50
  LOG.info("deleting structural variants")
@@ -44,10 +52,14 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
44
52
  adapter=adapter,
45
53
  vcf_obj=vcf_obj,
46
54
  case_obj=case_obj,
55
+ keep_chr_prefix=keep_chr_prefix,
56
+ genome_build=genome_build,
47
57
  )
48
58
 
49
59
 
50
- def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None):
60
+ def delete_variants(
61
+ adapter, vcf_obj, case_obj, keep_chr_prefix=None, case_id=None, genome_build=None
62
+ ):
51
63
  """Delete variants for a case in the database
52
64
 
53
65
  Args:
@@ -55,6 +67,7 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
55
67
  vcf_obj(iterable(dict))
56
68
  ind_positions(dict)
57
69
  case_id(str)
70
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
58
71
 
59
72
  Returns:
60
73
  nr_deleted (int): Number of deleted variants
@@ -69,7 +82,11 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
69
82
  variant_list = []
70
83
  for variant in vcf_obj:
71
84
  formated_variant = build_variant(
72
- variant=variant, case_obj=case_obj, case_id=case_id, genome_build=genome_build
85
+ variant=variant,
86
+ case_obj=case_obj,
87
+ keep_chr_prefix=keep_chr_prefix,
88
+ case_id=case_id,
89
+ genome_build=genome_build,
73
90
  )
74
91
 
75
92
  if not formated_variant:
@@ -109,7 +126,9 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
109
126
  return nr_deleted
110
127
 
111
128
 
112
- def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
129
+ def delete_structural_variants(
130
+ adapter, vcf_obj, case_obj, genome_build, keep_chr_prefix=None, case_id=None
131
+ ):
113
132
  """Delete structural variants for a case in the database
114
133
 
115
134
  Args:
@@ -117,6 +136,7 @@ def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
117
136
  vcf_obj(iterable(dict))
118
137
  ind_positions(dict)
119
138
  case_id(str)
139
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
120
140
 
121
141
  Returns:
122
142
  nr_deleted (int): Number of deleted variants"""
@@ -133,6 +153,8 @@ def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
133
153
  variant=variant,
134
154
  case_obj=case_obj,
135
155
  case_id=case_id,
156
+ genome_build=genome_build,
157
+ keep_chr_prefix=keep_chr_prefix,
136
158
  )
137
159
 
138
160
  if not formated_variant: