loqusdb 2.7.18__py3-none-any.whl → 2.7.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
loqusdb/__init__.py CHANGED
@@ -4,7 +4,7 @@ from pymongo import ASCENDING, IndexModel
4
4
 
5
5
  logger = logging.getLogger(__name__)
6
6
 
7
- __version__ = "2.7.18"
7
+ __version__ = "2.7.19"
8
8
 
9
9
  INDEXES = {
10
10
  "variant": [
@@ -24,23 +24,25 @@ def get_maf(variant):
24
24
  return variant.INFO.get("MAF")
25
25
 
26
26
 
27
- def build_profile_variant(variant):
27
+ def build_profile_variant(variant, keep_chr_prefix=None):
28
28
  """Returns a ProfileVariant object
29
29
 
30
30
  Args:
31
31
  variant (cyvcf2.Variant)
32
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
32
33
 
33
34
  Returns:
34
35
  variant (models.ProfileVariant)
35
36
  """
36
37
 
37
38
  chrom = variant.CHROM
38
- if chrom.startswith(("chr", "CHR", "Chr")):
39
- chrom = chrom[3:]
39
+ if not keep_chr_prefix:
40
+ if chrom.startswith(("chr", "CHR", "Chr")):
41
+ chrom = chrom[3:]
40
42
 
41
43
  pos = int(variant.POS)
42
44
 
43
- variant_id = get_variant_id(variant)
45
+ variant_id = get_variant_id(variant, keep_chr_prefix)
44
46
 
45
47
  ref = variant.REF
46
48
  alt = variant.ALT[0]
@@ -32,11 +32,21 @@ def check_par(chrom, pos, genome_build=None):
32
32
  )
33
33
 
34
34
 
35
- def get_variant_id(variant):
36
- """Get a variant id on the format chrom_pos_ref_alt"""
35
+ def get_variant_id(variant, keep_chr_prefix=None):
36
+ """Get a variant id on the format chrom_pos_ref_alt
37
+
38
+ Args:
39
+ variant (cyvcf2.Variant)
40
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
41
+
42
+ Returns:
43
+ variant (models.ProfileVariant)
44
+ """
45
+
37
46
  chrom = variant.CHROM
38
- if chrom.lower().startswith("chr"):
39
- chrom = chrom[3:]
47
+ if not keep_chr_prefix:
48
+ if chrom.lower().startswith("chr"):
49
+ chrom = chrom[3:]
40
50
  return "_".join([str(chrom), str(variant.POS), str(variant.REF), str(variant.ALT[0])])
41
51
 
42
52
 
@@ -68,11 +78,12 @@ def is_greater(a, b):
68
78
  return a_chrom == b_chrom and a.pos > b.pos
69
79
 
70
80
 
71
- def get_coords(variant):
81
+ def get_coords(variant, keep_chr_prefix):
72
82
  """Returns a dictionary with position information
73
83
 
74
84
  Args:
75
85
  variant(cyvcf2.Variant)
86
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
76
87
 
77
88
  Returns:
78
89
  coordinates(dict)
@@ -86,8 +97,9 @@ def get_coords(variant):
86
97
  "end": None,
87
98
  }
88
99
  chrom = variant.CHROM
89
- if chrom.startswith(("chr", "CHR", "Chr")):
90
- chrom = chrom[3:]
100
+ if not keep_chr_prefix:
101
+ if chrom.startswith(("chr", "CHR", "Chr")):
102
+ chrom = chrom[3:]
91
103
  coordinates["chrom"] = chrom
92
104
  end_chrom = chrom
93
105
 
@@ -107,8 +119,9 @@ def get_coords(variant):
107
119
  if sv_type == "BND":
108
120
  other_coordinates = alt.strip("ATCGN").strip("[]").split(":")
109
121
  end_chrom = other_coordinates[0]
110
- if end_chrom.startswith(("chr", "CHR", "Chr")):
111
- end_chrom = end_chrom[3:]
122
+ if not keep_chr_prefix:
123
+ if end_chrom.startswith(("chr", "CHR", "Chr")):
124
+ end_chrom = end_chrom[3:]
112
125
 
113
126
  end = int(other_coordinates[1])
114
127
 
@@ -148,6 +161,7 @@ def build_variant(
148
161
  case_id: Optional[str] = None,
149
162
  gq_threshold: Optional[int] = None,
150
163
  gq_qual: Optional[bool] = False,
164
+ keep_chr_prefix: Optional[bool] = False,
151
165
  ignore_gq_if_unset: Optional[bool] = False,
152
166
  genome_build: Optional[str] = None,
153
167
  ) -> Variant:
@@ -165,6 +179,7 @@ def build_variant(
165
179
  case_id(str): The case id
166
180
  gq_threshold(int): Genotype Quality threshold
167
181
  gq_qual(bool): Use variant.QUAL for quality instead of GQ
182
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
168
183
  ignore_gq_if_unset(bool): Ignore GQ threshold check for variants that do not have GQ or QUAL set.
169
184
  genome_build(str): Genome build. Ex. GRCh37 or GRCh38
170
185
 
@@ -179,14 +194,14 @@ def build_variant(
179
194
  sv = True
180
195
 
181
196
  # chrom_pos_ref_alt
182
- variant_id = get_variant_id(variant)
197
+ variant_id = get_variant_id(variant, keep_chr_prefix)
183
198
 
184
199
  ref = variant.REF
185
200
  # ALT is an array in cyvcf2
186
201
  # We allways assume splitted and normalized VCFs
187
202
  alt = variant.ALT[0]
188
203
 
189
- coordinates = get_coords(variant)
204
+ coordinates = get_coords(variant, keep_chr_prefix)
190
205
  chrom = coordinates["chrom"]
191
206
  pos = coordinates["pos"]
192
207
 
@@ -21,6 +21,7 @@ LOG = logging.getLogger(__name__)
21
21
  def annotate(ctx, variant_file, sv):
22
22
  """Annotate the variants in a VCF"""
23
23
  adapter = ctx.obj["adapter"]
24
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
24
25
 
25
26
  variant_path = os.path.abspath(variant_file)
26
27
 
@@ -40,9 +41,9 @@ def annotate(ctx, variant_file, sv):
40
41
  start_inserting = datetime.now()
41
42
 
42
43
  if sv:
43
- annotated_variants = annotate_svs(adapter, vcf_obj)
44
+ annotated_variants = annotate_svs(adapter, vcf_obj, keep_chr_prefix)
44
45
  else:
45
- annotated_variants = annotate_snvs(adapter, vcf_obj)
46
+ annotated_variants = annotate_snvs(adapter, vcf_obj, keep_chr_prefix)
46
47
  # try:
47
48
  for variant in annotated_variants:
48
49
  click.echo(str(variant).rstrip())
loqusdb/commands/cli.py CHANGED
@@ -55,11 +55,30 @@ LOG = logging.getLogger(__name__)
55
55
  type=click.Choice([GRCH37, GRCH38]),
56
56
  help="Specify what genome build to use",
57
57
  )
58
+ @click.option(
59
+ "--keep-chr-prefix",
60
+ is_flag=True,
61
+ default=False,
62
+ show_default=True,
63
+ help="Retain the 'chr/Chr/CHR' prefix for chromosomes if it is present",
64
+ )
58
65
  @click.option("-v", "--verbose", is_flag=True)
59
66
  @click.version_option(__version__)
60
67
  @click.pass_context
61
68
  def cli(
62
- ctx, database, username, password, authdb, port, host, uri, verbose, config, test, genome_build
69
+ ctx,
70
+ database,
71
+ username,
72
+ password,
73
+ authdb,
74
+ port,
75
+ host,
76
+ uri,
77
+ verbose,
78
+ config,
79
+ test,
80
+ genome_build,
81
+ keep_chr_prefix,
63
82
  ):
64
83
  """loqusdb: manage a local variant count database."""
65
84
  loglevel = "INFO"
@@ -103,6 +122,7 @@ def cli(
103
122
  adapter = MongoAdapter(client, db_name=database)
104
123
 
105
124
  genome_build = genome_build or configs.get("genome_build") or GRCH37
125
+ keep_chr_prefix = keep_chr_prefix or configs.get("keep_chr_prefix")
106
126
 
107
127
  ctx.obj = {}
108
128
  ctx.obj["db"] = database
@@ -114,3 +134,4 @@ def cli(
114
134
  ctx.obj["adapter"] = adapter
115
135
  ctx.obj["version"] = __version__
116
136
  ctx.obj["genome_build"] = genome_build
137
+ ctx.obj["keep_chr_prefix"] = keep_chr_prefix
@@ -35,6 +35,7 @@ def delete(ctx, family_file, family_type, case_id):
35
35
  ctx.abort()
36
36
 
37
37
  adapter = ctx.obj["adapter"]
38
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
38
39
 
39
40
  # Get a ped_parser.Family object from family file
40
41
  family = None
@@ -59,7 +60,12 @@ def delete(ctx, family_file, family_type, case_id):
59
60
  genome_build = ctx.obj["genome_build"]
60
61
  start_deleting = datetime.now()
61
62
  try:
62
- delete_command(adapter=adapter, case_obj=existing_case, genome_build=genome_build)
63
+ delete_command(
64
+ adapter=adapter,
65
+ case_obj=existing_case,
66
+ genome_build=genome_build,
67
+ keep_chr_prefix=keep_chr_prefix,
68
+ )
63
69
  except (CaseError, IOError) as error:
64
70
  LOG.warning(error)
65
71
  ctx.abort()
loqusdb/commands/load.py CHANGED
@@ -148,7 +148,7 @@ def load(
148
148
 
149
149
  adapter = ctx.obj["adapter"]
150
150
  genome_build = ctx.obj["genome_build"]
151
-
151
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
152
152
  start_inserting = datetime.now()
153
153
 
154
154
  try:
@@ -162,6 +162,7 @@ def load(
162
162
  case_id=case_id,
163
163
  gq_threshold=gq_threshold,
164
164
  snv_gq_only=snv_gq_only,
165
+ keep_chr_prefix=keep_chr_prefix,
165
166
  qual_gq=qual_gq,
166
167
  max_window=max_window,
167
168
  profile_file=variant_profile_path,
@@ -60,13 +60,14 @@ def load_profile(ctx, load, variant_file, update, stats, profile_threshold, chec
60
60
  """
61
61
 
62
62
  adapter = ctx.obj["adapter"]
63
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
63
64
 
64
65
  LOG.info("Running loqusdb profile")
65
66
 
66
67
  if check_vcf:
67
68
  LOG.info(f"Check if profile in {check_vcf} has match in database")
68
69
  vcf_file = check_vcf
69
- profiles = get_profiles(adapter, vcf_file)
70
+ profiles = get_profiles(adapter, vcf_file, keep_chr_prefix)
70
71
  duplicate = check_duplicates(adapter, profiles, profile_threshold)
71
72
 
72
73
  if duplicate is not None:
@@ -81,11 +82,11 @@ def load_profile(ctx, load, variant_file, update, stats, profile_threshold, chec
81
82
  if variant_file is not None:
82
83
  vcf_path = variant_file
83
84
  LOG.info(f"Loads variants in {vcf_path} to be used in profiling")
84
- load_profile_variants(adapter, vcf_path)
85
+ load_profile_variants(adapter, vcf_path, keep_chr_prefix)
85
86
 
86
87
  if update:
87
88
  LOG.info("Updates profiles in database")
88
- update_profiles(adapter)
89
+ update_profiles(adapter, keep_chr_prefix)
89
90
 
90
91
  if stats:
91
92
  LOG.info("Prints profile stats")
loqusdb/utils/annotate.py CHANGED
@@ -31,21 +31,7 @@ def annotate_variant(variant, var_obj=None):
31
31
  return variant
32
32
 
33
33
 
34
- def annotate_snv(adpter, variant):
35
- """Annotate an SNV/INDEL variant
36
-
37
- Args:
38
- adapter(loqusdb.plugin.adapter)
39
- variant(cyvcf2.Variant)
40
- """
41
- variant_id = get_variant_id(variant)
42
- variant_obj = adapter.get_variant(variant={"_id": variant_id})
43
-
44
- annotated_variant = annotated_variant(variant, variant_obj)
45
- return annotated_variant
46
-
47
-
48
- def annotate_svs(adapter, vcf_obj):
34
+ def annotate_svs(adapter, vcf_obj, keep_chr_prefix):
49
35
  """Annotate all SV variants in a VCF
50
36
 
51
37
  Args:
@@ -56,14 +42,14 @@ def annotate_svs(adapter, vcf_obj):
56
42
  variant(cyvcf2.Variant)
57
43
  """
58
44
  for nr_variants, variant in enumerate(vcf_obj, 1):
59
- variant_info = get_coords(variant)
45
+ variant_info = get_coords(variant, keep_chr_prefix)
60
46
  match = adapter.get_structural_variant(variant_info)
61
47
  if match:
62
48
  annotate_variant(variant, match)
63
49
  yield variant
64
50
 
65
51
 
66
- def annotate_snvs(adapter, vcf_obj):
52
+ def annotate_snvs(adapter, vcf_obj, keep_chr_prefix):
67
53
  """Annotate all variants in a VCF
68
54
 
69
55
  Args:
@@ -77,7 +63,7 @@ def annotate_snvs(adapter, vcf_obj):
77
63
 
78
64
  for nr_variants, variant in enumerate(vcf_obj, 1):
79
65
  # Add the variant to current batch
80
- variants[get_variant_id(variant)] = variant
66
+ variants[get_variant_id(variant, keep_chr_prefix)] = variant
81
67
  # If batch len == 1000 we annotate the batch
82
68
  if (nr_variants % 1000) == 0:
83
69
 
loqusdb/utils/delete.py CHANGED
@@ -9,7 +9,9 @@ from loqusdb.build_models.variant import build_variant
9
9
  LOG = logging.getLogger(__name__)
10
10
 
11
11
 
12
- def delete(adapter, case_obj, update=False, existing_case=False, genome_build=None):
12
+ def delete(
13
+ adapter, case_obj, keep_chr_prefix=None, update=False, existing_case=False, genome_build=None
14
+ ):
13
15
  """Delete a case and all of it's variants from the database.
14
16
 
15
17
  Args:
@@ -18,6 +20,7 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
18
20
  update(bool): If we are in the middle of an update
19
21
  existing_case(models.Case): If something failed during an update we need to revert
20
22
  to the original case
23
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefixes in chromosome IDs when they are present
21
24
 
22
25
  """
23
26
  # This will overwrite the updated case with the previous one
@@ -36,18 +39,22 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
36
39
  if file_type == "vcf_path":
37
40
  LOG.info("deleting variants")
38
41
  delete_variants(
39
- adapter=adapter, vcf_obj=vcf_obj, case_obj=case_obj, genome_build=genome_build
42
+ adapter=adapter,
43
+ vcf_obj=vcf_obj,
44
+ keep_chr_prefix=keep_chr_prefix,
45
+ case_obj=case_obj,
46
+ genome_build=genome_build,
40
47
  )
41
48
  elif file_type == "vcf_sv_path":
42
49
  LOG.info("deleting structural variants")
43
50
  delete_structural_variants(
44
- adapter=adapter,
45
- vcf_obj=vcf_obj,
46
- case_obj=case_obj,
51
+ adapter=adapter, vcf_obj=vcf_obj, case_obj=case_obj, keep_chr_prefix=keep_chr_prefix
47
52
  )
48
53
 
49
54
 
50
- def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None):
55
+ def delete_variants(
56
+ adapter, vcf_obj, case_obj, keep_chr_prefix=None, case_id=None, genome_build=None
57
+ ):
51
58
  """Delete variants for a case in the database
52
59
 
53
60
  Args:
@@ -69,7 +76,11 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
69
76
  variant_list = []
70
77
  for variant in vcf_obj:
71
78
  formated_variant = build_variant(
72
- variant=variant, case_obj=case_obj, case_id=case_id, genome_build=genome_build
79
+ variant=variant,
80
+ case_obj=case_obj,
81
+ keep_chr_prefix=keep_chr_prefix,
82
+ case_id=case_id,
83
+ genome_build=genome_build,
73
84
  )
74
85
 
75
86
  if not formated_variant:
@@ -109,7 +120,7 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
109
120
  return nr_deleted
110
121
 
111
122
 
112
- def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
123
+ def delete_structural_variants(adapter, vcf_obj, case_obj, keep_chr_prefix=None, case_id=None):
113
124
  """Delete structural variants for a case in the database
114
125
 
115
126
  Args:
@@ -130,9 +141,7 @@ def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
130
141
 
131
142
  for variant in vcf_obj:
132
143
  formated_variant = build_variant(
133
- variant=variant,
134
- case_obj=case_obj,
135
- case_id=case_id,
144
+ variant=variant, case_obj=case_obj, case_id=case_id, keep_chr_prefix=keep_chr_prefix
136
145
  )
137
146
 
138
147
  if not formated_variant:
loqusdb/utils/load.py CHANGED
@@ -32,6 +32,7 @@ def load_database(
32
32
  skip_case_id=False,
33
33
  gq_threshold=None,
34
34
  snv_gq_only=False,
35
+ keep_chr_prefix=False,
35
36
  qual_gq=False,
36
37
  case_id=None,
37
38
  max_window=3000,
@@ -51,6 +52,7 @@ def load_database(
51
52
  family_type(str): Format of family file
52
53
  skip_case_id(bool): If no case information should be added to variants
53
54
  gq_threshold(int): If only quality variants should be considered
55
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
54
56
  qual_gq(bool): Use QUAL field instead of GQ format tag to gate quality
55
57
  case_id(str): If different case id than the one in family file should be used
56
58
  max_window(int): Specify the max size for sv windows
@@ -68,7 +70,7 @@ def load_database(
68
70
  nr_variants = None
69
71
  vcf_individuals = None
70
72
  if variant_file:
71
- vcf_info = check_vcf(variant_file)
73
+ vcf_info = check_vcf(variant_file, keep_chr_prefix)
72
74
  nr_variants = vcf_info["nr_variants"]
73
75
  variant_type = vcf_info["variant_type"]
74
76
  vcf_files.append(variant_file)
@@ -78,7 +80,7 @@ def load_database(
78
80
  nr_sv_variants = None
79
81
  sv_individuals = None
80
82
  if sv_file:
81
- vcf_info = check_vcf(sv_file, "sv")
83
+ vcf_info = check_vcf(sv_file, keep_chr_prefix, "sv")
82
84
  nr_sv_variants = vcf_info["nr_variants"]
83
85
  vcf_files.append(sv_file)
84
86
  sv_individuals = vcf_info["individuals"]
@@ -86,7 +88,7 @@ def load_database(
86
88
  profiles = None
87
89
  matches = None
88
90
  if profile_file:
89
- profiles = get_profiles(adapter, profile_file)
91
+ profiles = get_profiles(adapter, profile_file, keep_chr_prefix)
90
92
  ###Check if any profile already exists
91
93
  matches = profile_match(
92
94
  adapter, profiles, hard_threshold=hard_threshold, soft_threshold=soft_threshold
@@ -152,6 +154,7 @@ def load_database(
152
154
  skip_case_id=skip_case_id,
153
155
  gq_threshold=gq_threshold if not snv_gq_only or variant_type == "snv" else None,
154
156
  qual_gq=qual_gq,
157
+ keep_chr_prefix=keep_chr_prefix,
155
158
  max_window=max_window,
156
159
  variant_type=variant_type,
157
160
  genome_build=genome_build,
@@ -200,6 +203,7 @@ def load_variants(
200
203
  skip_case_id=False,
201
204
  gq_threshold=None,
202
205
  qual_gq=False,
206
+ keep_chr_prefix=False,
203
207
  max_window=3000,
204
208
  variant_type="snv",
205
209
  genome_build=None,
@@ -213,6 +217,7 @@ def load_variants(
213
217
  case_obj(Case): dict with case information
214
218
  skip_case_id (bool): whether to include the case id on variant level
215
219
  or not
220
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
216
221
  gq_threshold(int)
217
222
  qual_gq(bool): whether to use QUAL instead of GQ
218
223
  max_window(int): Specify the max size for sv windows
@@ -242,6 +247,7 @@ def load_variants(
242
247
  case_id,
243
248
  gq_threshold,
244
249
  qual_gq,
250
+ keep_chr_prefix,
245
251
  ignore_gq_if_unset,
246
252
  genome_build=genome_build,
247
253
  )
@@ -263,7 +269,7 @@ def load_variants(
263
269
  return nr_inserted
264
270
 
265
271
 
266
- def load_profile_variants(adapter, variant_file):
272
+ def load_profile_variants(adapter, variant_file, keep_chr_prefix=None):
267
273
  """
268
274
 
269
275
  Loads variants used for profiling
@@ -275,7 +281,7 @@ def load_profile_variants(adapter, variant_file):
275
281
 
276
282
  """
277
283
 
278
- vcf_info = check_vcf(variant_file)
284
+ vcf_info = check_vcf(variant_file, keep_chr_prefix)
279
285
  variant_type = vcf_info["variant_type"]
280
286
 
281
287
  if variant_type != "snv":
@@ -284,5 +290,5 @@ def load_profile_variants(adapter, variant_file):
284
290
 
285
291
  vcf = get_vcf(variant_file)
286
292
 
287
- profile_variants = [build_profile_variant(variant) for variant in vcf]
293
+ profile_variants = [build_profile_variant(variant, keep_chr_prefix) for variant in vcf]
288
294
  adapter.add_profile_variants(profile_variants)
@@ -11,7 +11,7 @@ from .vcf import get_file_handle
11
11
  LOG = logging.getLogger(__name__)
12
12
 
13
13
 
14
- def get_profiles(adapter, vcf_file):
14
+ def get_profiles(adapter, vcf_file, keep_chr_prefix):
15
15
  """Given a vcf, get a profile string for each sample in the vcf
16
16
  based on the profile variants in the database
17
17
 
@@ -44,7 +44,7 @@ def get_profiles(adapter, vcf_file):
44
44
  found_variant = False
45
45
  for variant in vcf(region):
46
46
 
47
- variant_id = get_variant_id(variant)
47
+ variant_id = get_variant_id(variant, keep_chr_prefix)
48
48
 
49
49
  # If variant id i.e. chrom_pos_ref_alt matches
50
50
  if variant_id == profile_variant["_id"]:
@@ -183,7 +183,7 @@ def compare_profiles(profile1, profile2):
183
183
  return similarity_ratio
184
184
 
185
185
 
186
- def update_profiles(adapter):
186
+ def update_profiles(adapter, keep_chr_prefix):
187
187
  """
188
188
  For all cases having vcf_path, update the profile string for the samples
189
189
 
@@ -198,7 +198,7 @@ def update_profiles(adapter):
198
198
  # case with new profiled individuals.
199
199
  if case.get("profile_path"):
200
200
 
201
- profiles = get_profiles(adapter, case["profile_path"])
201
+ profiles = get_profiles(adapter, case["profile_path"], keep_chr_prefix)
202
202
  profiled_individuals = deepcopy(case["individuals"])
203
203
 
204
204
  for individual in profiled_individuals:
loqusdb/utils/vcf.py CHANGED
@@ -89,7 +89,7 @@ def check_sorting(previous_chrom, previous_pos, current_chrom, current_pos):
89
89
  pass
90
90
 
91
91
 
92
- def check_vcf(vcf_path, expected_type="snv"):
92
+ def check_vcf(vcf_path, keep_chr_prefix=None, expected_type="snv"):
93
93
  """Check if there are any problems with the vcf file
94
94
 
95
95
  Args:
@@ -113,7 +113,7 @@ def check_vcf(vcf_path, expected_type="snv"):
113
113
  previous_pos = None
114
114
  previous_chrom = None
115
115
 
116
- posititon_variants = set()
116
+ position_variants = set()
117
117
 
118
118
  nr_variants = 0
119
119
  for nr_variants, variant in enumerate(vcf, 1):
@@ -134,36 +134,36 @@ def check_vcf(vcf_path, expected_type="snv"):
134
134
  variant_id = "{0}_{1}".format(current_chrom, current_pos)
135
135
  # For SNVs we can create a proper variant id with chrom_pos_ref_alt
136
136
  if variant_type == "snv":
137
- variant_id = get_variant_id(variant)
137
+ variant_id = get_variant_id(variant, keep_chr_prefix)
138
138
 
139
139
  # Initiate variables
140
140
  if not previous_chrom:
141
141
  previous_chrom = current_chrom
142
142
  previous_pos = current_pos
143
- posititon_variants = {variant_id}
143
+ position_variants = {variant_id}
144
144
  continue
145
145
 
146
146
  # Update variables if new chromosome
147
147
  if current_chrom != previous_chrom:
148
148
  previous_chrom = current_chrom
149
149
  previous_pos = current_pos
150
- posititon_variants = {variant_id}
150
+ position_variants = {variant_id}
151
151
  continue
152
152
 
153
153
  if variant_type == "snv":
154
154
  # Check if variant is unique
155
155
  if current_pos == previous_pos:
156
- if variant_id in posititon_variants:
156
+ if variant_id in position_variants:
157
157
  raise VcfError("Variant {0} occurs several times" " in vcf".format(variant_id))
158
158
  else:
159
- posititon_variants.add(variant_id)
159
+ position_variants.add(variant_id)
160
160
  # Check if vcf is sorted
161
161
  else:
162
162
  if not current_pos >= previous_pos:
163
163
  raise VcfError("Vcf if not sorted in a correct way")
164
164
  previous_pos = current_pos
165
- # Reset posititon_variants since we are on a new position
166
- posititon_variants = {variant_id}
165
+ # Reset position_variants since we are on a new position
166
+ position_variants = {variant_id}
167
167
 
168
168
  if variant_type != expected_type:
169
169
  raise VcfError(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: loqusdb
3
- Version: 2.7.18
3
+ Version: 2.7.19
4
4
  Summary: A simple observation count database
5
5
  License: MIT
6
6
  Author: Your Name
@@ -1,17 +1,17 @@
1
- loqusdb/__init__.py,sha256=r4jbFPCtsyXv8VehXkXxBx1HDPPeaidp1mmwpN_Ghrg,1688
1
+ loqusdb/__init__.py,sha256=Zs9AtDiQwuASVgXDU0xzuWv8RhaadjMaa9WD4D7BMVc,1688
2
2
  loqusdb/__main__.py,sha256=8FGKySAGaWSzAYMj6HRsxeyiME3V01Idt7HrmN7pSYY,397
3
3
  loqusdb/build_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  loqusdb/build_models/case.py,sha256=AByutEYK2N3kS9JFvyZfPKNZdCpZHCSD0nNHAgaU1Cs,4127
5
- loqusdb/build_models/profile_variant.py,sha256=TbSxfVjESstS_FgbkOW4NQwMQVeTyhn9oc9yPZmDhzI,1021
6
- loqusdb/build_models/variant.py,sha256=Vee9FVwCC03wo6u2uI7qH9rgfevndBf364YSmE3Js2k,7271
5
+ loqusdb/build_models/profile_variant.py,sha256=WzWhxq4HNvf67IknyBWYnMHQzPMZ9eitw_so6lfOkPc,1166
6
+ loqusdb/build_models/variant.py,sha256=buIQr8GsNUBBtgf78a0n5I_GiMEogohSEQJibVUuM5Y,7815
7
7
  loqusdb/commands/__init__.py,sha256=BXAN3UADgqPrkGczzjlLO9GyyQ96dnLnP7n92JlYHgo,603
8
- loqusdb/commands/annotate.py,sha256=748kImopE5WbaO1nuv3WUgIqezWFSsi7SBeWhOz26-s,1384
9
- loqusdb/commands/cli.py,sha256=wJD5S1BoCxtRTAobd1QtmQpzlngJg-mt1nsyD92fDD4,3176
10
- loqusdb/commands/delete.py,sha256=R6ysHKSMw1mmL4ZbktoUIKzdzDLQ3314YPYhIy1myic,1979
8
+ loqusdb/commands/annotate.py,sha256=MGU9EerKYsFx1lkyjQ6ZMUKYuShi0uSTPJCS0cyxq7U,1467
9
+ loqusdb/commands/cli.py,sha256=XRprLQaENiLdqXG_7ugCC9jTcG7Uh54_M0KZj1ERFaM,3542
10
+ loqusdb/commands/delete.py,sha256=BRtm6Uade3l97FBcKFNkiYjks84AhuXYo-2QD8E74A4,2120
11
11
  loqusdb/commands/export.py,sha256=HKoRzUo_BHNOdw_TcKUId9TTowi8VJVGqnuDlK-FqFE,3531
12
12
  loqusdb/commands/identity.py,sha256=KLA9c8e6cJFDxtqIa1G6zdHTHK1sz2b3v1Utdtik_4k,787
13
- loqusdb/commands/load.py,sha256=QMUSzdaCi1lVC9F4u0xkAJtVz4N_tMPh3Nu6Obl9ky4,4903
14
- loqusdb/commands/load_profile.py,sha256=cflCbF9f77_HCH8xPnN8zSSocvIffRMnC2LPE0j7Xq8,3336
13
+ loqusdb/commands/load.py,sha256=pHtjldblUM-HFFgcN5UtoaxGhYmo1yeexqGq4I427qk,4996
14
+ loqusdb/commands/load_profile.py,sha256=x-T2bzi2SL5kwZhY_3hHQCtGDLao1xkxj1pZaOnzs4U,3436
15
15
  loqusdb/commands/migrate.py,sha256=2C8YL-zVqnpnqg3JIyUr0rbVnb8-AGPVWNhicHnPKLo,667
16
16
  loqusdb/commands/restore.py,sha256=eqPX0yao0IAYS5SbjCdlsfSJRBbRByBLISUU2hTzqqs,1492
17
17
  loqusdb/commands/update.py,sha256=zz3wueaJVqJ1FKact-rpY2az__5oa1LnZKf7mgqNGPk,3211
@@ -40,17 +40,17 @@ loqusdb/resources/loqusdb.20181005.gz,sha256=DI8CLI7fPnIAjM25Avraz-C7KQkOKsfnhgZ
40
40
  loqusdb/resources/maf_50_sites_GRCh37.vcf.gz,sha256=BoD1_xZ-Rr8DTWCMNlQGh7gz1K8FA-j2nC4jKn_eB2A,5260
41
41
  loqusdb/resources/maf_50_sites_GRCh38.vcf.gz,sha256=6T4iyrIr6yx1HpgobzAsh305BO1JX0oGj48nFiYt2QM,9037
42
42
  loqusdb/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
- loqusdb/utils/annotate.py,sha256=cPNWlhsv6yoe3lxNfa9DytO5eACuM_mOJJw_mglVMN0,2646
43
+ loqusdb/utils/annotate.py,sha256=vOHlLkenwCCLXh-cjerd9cW68eZfEtgvP0IwWh-oBHs,2347
44
44
  loqusdb/utils/case.py,sha256=aeTvyACJTDjzl-aOjAZaUzFMLisgFKMfcoXSvNAZz4s,2168
45
- loqusdb/utils/delete.py,sha256=-ddBM_QXKzlUN6egEJggKzXX1P-WEdi92HgaD1DJRtg,4843
46
- loqusdb/utils/load.py,sha256=LyYHPwaCVV-7edbaoPahah05yPfG9H4VCYSg1gmz_M0,9063
45
+ loqusdb/utils/delete.py,sha256=uj1m5i12GjUhhnCnIbh6D7BMG-oMDk6bfrJxk8zpSxE,5208
46
+ loqusdb/utils/load.py,sha256=GgJyTLSOpgcEqjvo9RXzcacQLzHZYtXF_tkyp_XJwOs,9448
47
47
  loqusdb/utils/migrate.py,sha256=9Q6kdIi9TpFVzDYptlEE8RqPPS5wyzfM3F8egzmmBBk,1113
48
- loqusdb/utils/profiling.py,sha256=3OizF7CpYvSl9kyl2g4KGJxbIRUqWfmfLxn3843XYDk,9164
48
+ loqusdb/utils/profiling.py,sha256=uISq4xfRNPPedoYXS_D4dXphq8odDogfMBm_XfHBTpE,9232
49
49
  loqusdb/utils/update.py,sha256=1edJG-u24FgOSxyXAQEiyTG4IyK-Uo3lSIl5qyzcXsI,4433
50
50
  loqusdb/utils/variant.py,sha256=U6nMZRUf5NDDQ74nG0HBCLMnFQVgFAT6eHll_F2uiwc,2087
51
- loqusdb/utils/vcf.py,sha256=ybmrTBEPYa0FbUXo8ttlwATk13RnKjX9eIDbRDwCiVE,5175
52
- loqusdb-2.7.18.dist-info/LICENSE,sha256=urpFcJXw3elN9kV2fFutc-lXegjuu2lqP_GSy8_CAbs,1054
53
- loqusdb-2.7.18.dist-info/METADATA,sha256=yTsm5y8i4u_chGfnAbMm5ZfjorJOVZQfl4qK9vdJDCk,5321
54
- loqusdb-2.7.18.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
55
- loqusdb-2.7.18.dist-info/entry_points.txt,sha256=wFoWzEFjsSgXkj9FMQA8C9ihZoJ9R1XvbGuX9hEEI6E,52
56
- loqusdb-2.7.18.dist-info/RECORD,,
51
+ loqusdb/utils/vcf.py,sha256=og8JBYock31v_0CnsoRhuKIJCurLCIFW8PCCQIRWF-Q,5207
52
+ loqusdb-2.7.19.dist-info/LICENSE,sha256=urpFcJXw3elN9kV2fFutc-lXegjuu2lqP_GSy8_CAbs,1054
53
+ loqusdb-2.7.19.dist-info/METADATA,sha256=w1TxeA5Lz1cYqetHRw0UGvuEUS82WfpOgxThm5hQOYs,5321
54
+ loqusdb-2.7.19.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
55
+ loqusdb-2.7.19.dist-info/entry_points.txt,sha256=wFoWzEFjsSgXkj9FMQA8C9ihZoJ9R1XvbGuX9hEEI6E,52
56
+ loqusdb-2.7.19.dist-info/RECORD,,