loqusdb 2.7.17__py3-none-any.whl → 2.7.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
loqusdb/__init__.py CHANGED
@@ -4,7 +4,7 @@ from pymongo import ASCENDING, IndexModel
4
4
 
5
5
  logger = logging.getLogger(__name__)
6
6
 
7
- __version__ = "2.7.17"
7
+ __version__ = "2.7.19"
8
8
 
9
9
  INDEXES = {
10
10
  "variant": [
@@ -24,23 +24,25 @@ def get_maf(variant):
24
24
  return variant.INFO.get("MAF")
25
25
 
26
26
 
27
- def build_profile_variant(variant):
27
+ def build_profile_variant(variant, keep_chr_prefix=None):
28
28
  """Returns a ProfileVariant object
29
29
 
30
30
  Args:
31
31
  variant (cyvcf2.Variant)
32
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
32
33
 
33
34
  Returns:
34
35
  variant (models.ProfileVariant)
35
36
  """
36
37
 
37
38
  chrom = variant.CHROM
38
- if chrom.startswith(("chr", "CHR", "Chr")):
39
- chrom = chrom[3:]
39
+ if not keep_chr_prefix:
40
+ if chrom.startswith(("chr", "CHR", "Chr")):
41
+ chrom = chrom[3:]
40
42
 
41
43
  pos = int(variant.POS)
42
44
 
43
- variant_id = get_variant_id(variant)
45
+ variant_id = get_variant_id(variant, keep_chr_prefix)
44
46
 
45
47
  ref = variant.REF
46
48
  alt = variant.ALT[0]
@@ -32,11 +32,21 @@ def check_par(chrom, pos, genome_build=None):
32
32
  )
33
33
 
34
34
 
35
- def get_variant_id(variant):
36
- """Get a variant id on the format chrom_pos_ref_alt"""
35
+ def get_variant_id(variant, keep_chr_prefix=None):
36
+ """Get a variant id on the format chrom_pos_ref_alt
37
+
38
+ Args:
39
+ variant (cyvcf2.Variant)
40
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
41
+
42
+ Returns:
43
+ variant (models.ProfileVariant)
44
+ """
45
+
37
46
  chrom = variant.CHROM
38
- if chrom.lower().startswith("chr"):
39
- chrom = chrom[3:]
47
+ if not keep_chr_prefix:
48
+ if chrom.lower().startswith("chr"):
49
+ chrom = chrom[3:]
40
50
  return "_".join([str(chrom), str(variant.POS), str(variant.REF), str(variant.ALT[0])])
41
51
 
42
52
 
@@ -68,11 +78,12 @@ def is_greater(a, b):
68
78
  return a_chrom == b_chrom and a.pos > b.pos
69
79
 
70
80
 
71
- def get_coords(variant):
81
+ def get_coords(variant, keep_chr_prefix):
72
82
  """Returns a dictionary with position information
73
83
 
74
84
  Args:
75
85
  variant(cyvcf2.Variant)
86
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
76
87
 
77
88
  Returns:
78
89
  coordinates(dict)
@@ -86,8 +97,9 @@ def get_coords(variant):
86
97
  "end": None,
87
98
  }
88
99
  chrom = variant.CHROM
89
- if chrom.startswith(("chr", "CHR", "Chr")):
90
- chrom = chrom[3:]
100
+ if not keep_chr_prefix:
101
+ if chrom.startswith(("chr", "CHR", "Chr")):
102
+ chrom = chrom[3:]
91
103
  coordinates["chrom"] = chrom
92
104
  end_chrom = chrom
93
105
 
@@ -107,8 +119,9 @@ def get_coords(variant):
107
119
  if sv_type == "BND":
108
120
  other_coordinates = alt.strip("ATCGN").strip("[]").split(":")
109
121
  end_chrom = other_coordinates[0]
110
- if end_chrom.startswith(("chr", "CHR", "Chr")):
111
- end_chrom = end_chrom[3:]
122
+ if not keep_chr_prefix:
123
+ if end_chrom.startswith(("chr", "CHR", "Chr")):
124
+ end_chrom = end_chrom[3:]
112
125
 
113
126
  end = int(other_coordinates[1])
114
127
 
@@ -148,6 +161,8 @@ def build_variant(
148
161
  case_id: Optional[str] = None,
149
162
  gq_threshold: Optional[int] = None,
150
163
  gq_qual: Optional[bool] = False,
164
+ keep_chr_prefix: Optional[bool] = False,
165
+ ignore_gq_if_unset: Optional[bool] = False,
151
166
  genome_build: Optional[str] = None,
152
167
  ) -> Variant:
153
168
  """Return a Variant object
@@ -164,6 +179,9 @@ def build_variant(
164
179
  case_id(str): The case id
165
180
  gq_threshold(int): Genotype Quality threshold
166
181
  gq_qual(bool): Use variant.QUAL for quality instead of GQ
182
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
183
+ ignore_gq_if_unset(bool): Ignore GQ threshold check for variants that do not have GQ or QUAL set.
184
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
167
185
 
168
186
  Return:
169
187
  formated_variant(models.Variant): A variant dictionary
@@ -176,14 +194,14 @@ def build_variant(
176
194
  sv = True
177
195
 
178
196
  # chrom_pos_ref_alt
179
- variant_id = get_variant_id(variant)
197
+ variant_id = get_variant_id(variant, keep_chr_prefix)
180
198
 
181
199
  ref = variant.REF
182
200
  # ALT is an array in cyvcf2
183
201
  # We allways assume splitted and normalized VCFs
184
202
  alt = variant.ALT[0]
185
203
 
186
- coordinates = get_coords(variant)
204
+ coordinates = get_coords(variant, keep_chr_prefix)
187
205
  chrom = coordinates["chrom"]
188
206
  pos = coordinates["pos"]
189
207
 
@@ -201,14 +219,15 @@ def build_variant(
201
219
  ind_pos = ind_obj["ind_index"]
202
220
 
203
221
  if gq_qual:
204
- gq = 0
222
+ gq = -1
205
223
  if variant.QUAL:
206
224
  gq = int(variant.QUAL)
207
225
 
208
226
  if not gq_qual:
209
227
  gq = int(variant.gt_quals[ind_pos])
210
228
 
211
- if gq_threshold and gq < gq_threshold:
229
+ # When gq is missing in FORMAT cyvcf2 assigns a score of -1
230
+ if (gq_threshold and 0 <= gq < gq_threshold) or (gq == -1 and not ignore_gq_if_unset):
212
231
  continue
213
232
 
214
233
  genotype = GENOTYPE_MAP[variant.gt_types[ind_pos]]
@@ -21,6 +21,7 @@ LOG = logging.getLogger(__name__)
21
21
  def annotate(ctx, variant_file, sv):
22
22
  """Annotate the variants in a VCF"""
23
23
  adapter = ctx.obj["adapter"]
24
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
24
25
 
25
26
  variant_path = os.path.abspath(variant_file)
26
27
 
@@ -40,9 +41,9 @@ def annotate(ctx, variant_file, sv):
40
41
  start_inserting = datetime.now()
41
42
 
42
43
  if sv:
43
- annotated_variants = annotate_svs(adapter, vcf_obj)
44
+ annotated_variants = annotate_svs(adapter, vcf_obj, keep_chr_prefix)
44
45
  else:
45
- annotated_variants = annotate_snvs(adapter, vcf_obj)
46
+ annotated_variants = annotate_snvs(adapter, vcf_obj, keep_chr_prefix)
46
47
  # try:
47
48
  for variant in annotated_variants:
48
49
  click.echo(str(variant).rstrip())
loqusdb/commands/cli.py CHANGED
@@ -55,11 +55,30 @@ LOG = logging.getLogger(__name__)
55
55
  type=click.Choice([GRCH37, GRCH38]),
56
56
  help="Specify what genome build to use",
57
57
  )
58
+ @click.option(
59
+ "--keep-chr-prefix",
60
+ is_flag=True,
61
+ default=False,
62
+ show_default=True,
63
+ help="Retain the 'chr/Chr/CHR' prefix for chromosomes if it is present",
64
+ )
58
65
  @click.option("-v", "--verbose", is_flag=True)
59
66
  @click.version_option(__version__)
60
67
  @click.pass_context
61
68
  def cli(
62
- ctx, database, username, password, authdb, port, host, uri, verbose, config, test, genome_build
69
+ ctx,
70
+ database,
71
+ username,
72
+ password,
73
+ authdb,
74
+ port,
75
+ host,
76
+ uri,
77
+ verbose,
78
+ config,
79
+ test,
80
+ genome_build,
81
+ keep_chr_prefix,
63
82
  ):
64
83
  """loqusdb: manage a local variant count database."""
65
84
  loglevel = "INFO"
@@ -103,6 +122,7 @@ def cli(
103
122
  adapter = MongoAdapter(client, db_name=database)
104
123
 
105
124
  genome_build = genome_build or configs.get("genome_build") or GRCH37
125
+ keep_chr_prefix = keep_chr_prefix or configs.get("keep_chr_prefix")
106
126
 
107
127
  ctx.obj = {}
108
128
  ctx.obj["db"] = database
@@ -114,3 +134,4 @@ def cli(
114
134
  ctx.obj["adapter"] = adapter
115
135
  ctx.obj["version"] = __version__
116
136
  ctx.obj["genome_build"] = genome_build
137
+ ctx.obj["keep_chr_prefix"] = keep_chr_prefix
@@ -35,6 +35,7 @@ def delete(ctx, family_file, family_type, case_id):
35
35
  ctx.abort()
36
36
 
37
37
  adapter = ctx.obj["adapter"]
38
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
38
39
 
39
40
  # Get a ped_parser.Family object from family file
40
41
  family = None
@@ -59,7 +60,12 @@ def delete(ctx, family_file, family_type, case_id):
59
60
  genome_build = ctx.obj["genome_build"]
60
61
  start_deleting = datetime.now()
61
62
  try:
62
- delete_command(adapter=adapter, case_obj=existing_case, genome_build=genome_build)
63
+ delete_command(
64
+ adapter=adapter,
65
+ case_obj=existing_case,
66
+ genome_build=genome_build,
67
+ keep_chr_prefix=keep_chr_prefix,
68
+ )
63
69
  except (CaseError, IOError) as error:
64
70
  LOG.warning(error)
65
71
  ctx.abort()
loqusdb/commands/load.py CHANGED
@@ -95,6 +95,13 @@ def validate_profile_threshold(ctx, param, value):
95
95
  show_default=True,
96
96
  help="Apply GQ threshold only to SNV variants",
97
97
  )
98
+ @click.option(
99
+ "--ignore-gq-if-unset",
100
+ is_flag=True,
101
+ default=False,
102
+ show_default=True,
103
+ help="Ignore GQ threshold if GQ (or the QUAL field for --qual-gq) is unset in VCF",
104
+ )
98
105
  @click.pass_context
99
106
  def load(
100
107
  ctx,
@@ -112,6 +119,7 @@ def load(
112
119
  soft_threshold,
113
120
  qual_gq,
114
121
  snv_gq_only,
122
+ ignore_gq_if_unset,
115
123
  ):
116
124
  """Load the variants of a case
117
125
 
@@ -140,7 +148,7 @@ def load(
140
148
 
141
149
  adapter = ctx.obj["adapter"]
142
150
  genome_build = ctx.obj["genome_build"]
143
-
151
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
144
152
  start_inserting = datetime.now()
145
153
 
146
154
  try:
@@ -154,12 +162,14 @@ def load(
154
162
  case_id=case_id,
155
163
  gq_threshold=gq_threshold,
156
164
  snv_gq_only=snv_gq_only,
165
+ keep_chr_prefix=keep_chr_prefix,
157
166
  qual_gq=qual_gq,
158
167
  max_window=max_window,
159
168
  profile_file=variant_profile_path,
160
169
  hard_threshold=hard_threshold,
161
170
  soft_threshold=soft_threshold,
162
171
  genome_build=genome_build,
172
+ ignore_gq_if_unset=ignore_gq_if_unset,
163
173
  )
164
174
  except (SyntaxError, CaseError, IOError) as error:
165
175
  LOG.warning(error)
@@ -60,13 +60,14 @@ def load_profile(ctx, load, variant_file, update, stats, profile_threshold, chec
60
60
  """
61
61
 
62
62
  adapter = ctx.obj["adapter"]
63
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
63
64
 
64
65
  LOG.info("Running loqusdb profile")
65
66
 
66
67
  if check_vcf:
67
68
  LOG.info(f"Check if profile in {check_vcf} has match in database")
68
69
  vcf_file = check_vcf
69
- profiles = get_profiles(adapter, vcf_file)
70
+ profiles = get_profiles(adapter, vcf_file, keep_chr_prefix)
70
71
  duplicate = check_duplicates(adapter, profiles, profile_threshold)
71
72
 
72
73
  if duplicate is not None:
@@ -81,11 +82,11 @@ def load_profile(ctx, load, variant_file, update, stats, profile_threshold, chec
81
82
  if variant_file is not None:
82
83
  vcf_path = variant_file
83
84
  LOG.info(f"Loads variants in {vcf_path} to be used in profiling")
84
- load_profile_variants(adapter, vcf_path)
85
+ load_profile_variants(adapter, vcf_path, keep_chr_prefix)
85
86
 
86
87
  if update:
87
88
  LOG.info("Updates profiles in database")
88
- update_profiles(adapter)
89
+ update_profiles(adapter, keep_chr_prefix)
89
90
 
90
91
  if stats:
91
92
  LOG.info("Prints profile stats")
loqusdb/utils/annotate.py CHANGED
@@ -31,21 +31,7 @@ def annotate_variant(variant, var_obj=None):
31
31
  return variant
32
32
 
33
33
 
34
- def annotate_snv(adpter, variant):
35
- """Annotate an SNV/INDEL variant
36
-
37
- Args:
38
- adapter(loqusdb.plugin.adapter)
39
- variant(cyvcf2.Variant)
40
- """
41
- variant_id = get_variant_id(variant)
42
- variant_obj = adapter.get_variant(variant={"_id": variant_id})
43
-
44
- annotated_variant = annotated_variant(variant, variant_obj)
45
- return annotated_variant
46
-
47
-
48
- def annotate_svs(adapter, vcf_obj):
34
+ def annotate_svs(adapter, vcf_obj, keep_chr_prefix):
49
35
  """Annotate all SV variants in a VCF
50
36
 
51
37
  Args:
@@ -56,14 +42,14 @@ def annotate_svs(adapter, vcf_obj):
56
42
  variant(cyvcf2.Variant)
57
43
  """
58
44
  for nr_variants, variant in enumerate(vcf_obj, 1):
59
- variant_info = get_coords(variant)
45
+ variant_info = get_coords(variant, keep_chr_prefix)
60
46
  match = adapter.get_structural_variant(variant_info)
61
47
  if match:
62
48
  annotate_variant(variant, match)
63
49
  yield variant
64
50
 
65
51
 
66
- def annotate_snvs(adapter, vcf_obj):
52
+ def annotate_snvs(adapter, vcf_obj, keep_chr_prefix):
67
53
  """Annotate all variants in a VCF
68
54
 
69
55
  Args:
@@ -77,7 +63,7 @@ def annotate_snvs(adapter, vcf_obj):
77
63
 
78
64
  for nr_variants, variant in enumerate(vcf_obj, 1):
79
65
  # Add the variant to current batch
80
- variants[get_variant_id(variant)] = variant
66
+ variants[get_variant_id(variant, keep_chr_prefix)] = variant
81
67
  # If batch len == 1000 we annotate the batch
82
68
  if (nr_variants % 1000) == 0:
83
69
 
loqusdb/utils/delete.py CHANGED
@@ -9,7 +9,9 @@ from loqusdb.build_models.variant import build_variant
9
9
  LOG = logging.getLogger(__name__)
10
10
 
11
11
 
12
- def delete(adapter, case_obj, update=False, existing_case=False, genome_build=None):
12
+ def delete(
13
+ adapter, case_obj, keep_chr_prefix=None, update=False, existing_case=False, genome_build=None
14
+ ):
13
15
  """Delete a case and all of it's variants from the database.
14
16
 
15
17
  Args:
@@ -18,6 +20,7 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
18
20
  update(bool): If we are in the middle of an update
19
21
  existing_case(models.Case): If something failed during an update we need to revert
20
22
  to the original case
23
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefixes in chromosome IDs when they are present
21
24
 
22
25
  """
23
26
  # This will overwrite the updated case with the previous one
@@ -36,18 +39,22 @@ def delete(adapter, case_obj, update=False, existing_case=False, genome_build=No
36
39
  if file_type == "vcf_path":
37
40
  LOG.info("deleting variants")
38
41
  delete_variants(
39
- adapter=adapter, vcf_obj=vcf_obj, case_obj=case_obj, genome_build=genome_build
42
+ adapter=adapter,
43
+ vcf_obj=vcf_obj,
44
+ keep_chr_prefix=keep_chr_prefix,
45
+ case_obj=case_obj,
46
+ genome_build=genome_build,
40
47
  )
41
48
  elif file_type == "vcf_sv_path":
42
49
  LOG.info("deleting structural variants")
43
50
  delete_structural_variants(
44
- adapter=adapter,
45
- vcf_obj=vcf_obj,
46
- case_obj=case_obj,
51
+ adapter=adapter, vcf_obj=vcf_obj, case_obj=case_obj, keep_chr_prefix=keep_chr_prefix
47
52
  )
48
53
 
49
54
 
50
- def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None):
55
+ def delete_variants(
56
+ adapter, vcf_obj, case_obj, keep_chr_prefix=None, case_id=None, genome_build=None
57
+ ):
51
58
  """Delete variants for a case in the database
52
59
 
53
60
  Args:
@@ -69,7 +76,11 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
69
76
  variant_list = []
70
77
  for variant in vcf_obj:
71
78
  formated_variant = build_variant(
72
- variant=variant, case_obj=case_obj, case_id=case_id, genome_build=genome_build
79
+ variant=variant,
80
+ case_obj=case_obj,
81
+ keep_chr_prefix=keep_chr_prefix,
82
+ case_id=case_id,
83
+ genome_build=genome_build,
73
84
  )
74
85
 
75
86
  if not formated_variant:
@@ -109,7 +120,7 @@ def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None)
109
120
  return nr_deleted
110
121
 
111
122
 
112
- def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
123
+ def delete_structural_variants(adapter, vcf_obj, case_obj, keep_chr_prefix=None, case_id=None):
113
124
  """Delete structural variants for a case in the database
114
125
 
115
126
  Args:
@@ -130,9 +141,7 @@ def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
130
141
 
131
142
  for variant in vcf_obj:
132
143
  formated_variant = build_variant(
133
- variant=variant,
134
- case_obj=case_obj,
135
- case_id=case_id,
144
+ variant=variant, case_obj=case_obj, case_id=case_id, keep_chr_prefix=keep_chr_prefix
136
145
  )
137
146
 
138
147
  if not formated_variant:
loqusdb/utils/load.py CHANGED
@@ -32,6 +32,7 @@ def load_database(
32
32
  skip_case_id=False,
33
33
  gq_threshold=None,
34
34
  snv_gq_only=False,
35
+ keep_chr_prefix=False,
35
36
  qual_gq=False,
36
37
  case_id=None,
37
38
  max_window=3000,
@@ -39,6 +40,7 @@ def load_database(
39
40
  hard_threshold=0.95,
40
41
  soft_threshold=0.9,
41
42
  genome_build=None,
43
+ ignore_gq_if_unset=False,
42
44
  ):
43
45
  """Load the database with a case and its variants
44
46
 
@@ -50,12 +52,15 @@ def load_database(
50
52
  family_type(str): Format of family file
51
53
  skip_case_id(bool): If no case information should be added to variants
52
54
  gq_threshold(int): If only quality variants should be considered
55
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
53
56
  qual_gq(bool): Use QUAL field instead of GQ format tag to gate quality
54
57
  case_id(str): If different case id than the one in family file should be used
55
58
  max_window(int): Specify the max size for sv windows
56
59
  check_profile(bool): Does profile check if True
57
60
  hard_threshold(float): Rejects load if hamming distance above this is found
58
61
  soft_threshold(float): Stores similar samples if hamming distance above this is found
62
+ genome_build(str): Store the genome version
63
+ ignore_gq_if_unset(str): Ignore the gq threhsold check for variants that do not have a GQ or QUAL set
59
64
 
60
65
  Returns:
61
66
  nr_inserted(int)
@@ -65,7 +70,7 @@ def load_database(
65
70
  nr_variants = None
66
71
  vcf_individuals = None
67
72
  if variant_file:
68
- vcf_info = check_vcf(variant_file)
73
+ vcf_info = check_vcf(variant_file, keep_chr_prefix)
69
74
  nr_variants = vcf_info["nr_variants"]
70
75
  variant_type = vcf_info["variant_type"]
71
76
  vcf_files.append(variant_file)
@@ -75,7 +80,7 @@ def load_database(
75
80
  nr_sv_variants = None
76
81
  sv_individuals = None
77
82
  if sv_file:
78
- vcf_info = check_vcf(sv_file, "sv")
83
+ vcf_info = check_vcf(sv_file, keep_chr_prefix, "sv")
79
84
  nr_sv_variants = vcf_info["nr_variants"]
80
85
  vcf_files.append(sv_file)
81
86
  sv_individuals = vcf_info["individuals"]
@@ -83,7 +88,7 @@ def load_database(
83
88
  profiles = None
84
89
  matches = None
85
90
  if profile_file:
86
- profiles = get_profiles(adapter, profile_file)
91
+ profiles = get_profiles(adapter, profile_file, keep_chr_prefix)
87
92
  ###Check if any profile already exists
88
93
  matches = profile_match(
89
94
  adapter, profiles, hard_threshold=hard_threshold, soft_threshold=soft_threshold
@@ -149,9 +154,11 @@ def load_database(
149
154
  skip_case_id=skip_case_id,
150
155
  gq_threshold=gq_threshold if not snv_gq_only or variant_type == "snv" else None,
151
156
  qual_gq=qual_gq,
157
+ keep_chr_prefix=keep_chr_prefix,
152
158
  max_window=max_window,
153
159
  variant_type=variant_type,
154
160
  genome_build=genome_build,
161
+ ignore_gq_if_unset=ignore_gq_if_unset,
155
162
  )
156
163
  except Exception as err:
157
164
  # If something went wrong do a rollback
@@ -196,21 +203,27 @@ def load_variants(
196
203
  skip_case_id=False,
197
204
  gq_threshold=None,
198
205
  qual_gq=False,
206
+ keep_chr_prefix=False,
199
207
  max_window=3000,
200
208
  variant_type="snv",
201
209
  genome_build=None,
210
+ ignore_gq_if_unset=False,
202
211
  ):
203
212
  """Load variants for a family into the database.
204
213
 
205
214
  Args:
206
215
  adapter (loqusdb.plugins.Adapter): initialized plugin
216
+ vcf_obj(cyvcf2.VCF): Iterable with cyvcf2.Variant
207
217
  case_obj(Case): dict with case information
208
- nr_variants(int)
209
218
  skip_case_id (bool): whether to include the case id on variant level
210
219
  or not
220
+ keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
211
221
  gq_threshold(int)
222
+ qual_gq(bool): whether to use QUAL instead of GQ
212
223
  max_window(int): Specify the max size for sv windows
213
224
  variant_type(str): 'sv' or 'snv'
225
+ genome_build(str): Genome version. Ex. GRCH37
226
+ ignore_gq_if_unset (bool): whether to add entries that have missing GQ or QUAL field
214
227
 
215
228
  Returns:
216
229
  nr_inserted(int)
@@ -229,7 +242,14 @@ def load_variants(
229
242
 
230
243
  variants = (
231
244
  build_variant(
232
- variant, case_obj, case_id, gq_threshold, qual_gq, genome_build=genome_build
245
+ variant,
246
+ case_obj,
247
+ case_id,
248
+ gq_threshold,
249
+ qual_gq,
250
+ keep_chr_prefix,
251
+ ignore_gq_if_unset,
252
+ genome_build=genome_build,
233
253
  )
234
254
  for variant in bar
235
255
  )
@@ -249,7 +269,7 @@ def load_variants(
249
269
  return nr_inserted
250
270
 
251
271
 
252
- def load_profile_variants(adapter, variant_file):
272
+ def load_profile_variants(adapter, variant_file, keep_chr_prefix=None):
253
273
  """
254
274
 
255
275
  Loads variants used for profiling
@@ -261,7 +281,7 @@ def load_profile_variants(adapter, variant_file):
261
281
 
262
282
  """
263
283
 
264
- vcf_info = check_vcf(variant_file)
284
+ vcf_info = check_vcf(variant_file, keep_chr_prefix)
265
285
  variant_type = vcf_info["variant_type"]
266
286
 
267
287
  if variant_type != "snv":
@@ -270,5 +290,5 @@ def load_profile_variants(adapter, variant_file):
270
290
 
271
291
  vcf = get_vcf(variant_file)
272
292
 
273
- profile_variants = [build_profile_variant(variant) for variant in vcf]
293
+ profile_variants = [build_profile_variant(variant, keep_chr_prefix) for variant in vcf]
274
294
  adapter.add_profile_variants(profile_variants)
@@ -11,7 +11,7 @@ from .vcf import get_file_handle
11
11
  LOG = logging.getLogger(__name__)
12
12
 
13
13
 
14
- def get_profiles(adapter, vcf_file):
14
+ def get_profiles(adapter, vcf_file, keep_chr_prefix):
15
15
  """Given a vcf, get a profile string for each sample in the vcf
16
16
  based on the profile variants in the database
17
17
 
@@ -44,7 +44,7 @@ def get_profiles(adapter, vcf_file):
44
44
  found_variant = False
45
45
  for variant in vcf(region):
46
46
 
47
- variant_id = get_variant_id(variant)
47
+ variant_id = get_variant_id(variant, keep_chr_prefix)
48
48
 
49
49
  # If variant id i.e. chrom_pos_ref_alt matches
50
50
  if variant_id == profile_variant["_id"]:
@@ -183,7 +183,7 @@ def compare_profiles(profile1, profile2):
183
183
  return similarity_ratio
184
184
 
185
185
 
186
- def update_profiles(adapter):
186
+ def update_profiles(adapter, keep_chr_prefix):
187
187
  """
188
188
  For all cases having vcf_path, update the profile string for the samples
189
189
 
@@ -198,7 +198,7 @@ def update_profiles(adapter):
198
198
  # case with new profiled individuals.
199
199
  if case.get("profile_path"):
200
200
 
201
- profiles = get_profiles(adapter, case["profile_path"])
201
+ profiles = get_profiles(adapter, case["profile_path"], keep_chr_prefix)
202
202
  profiled_individuals = deepcopy(case["individuals"])
203
203
 
204
204
  for individual in profiled_individuals:
loqusdb/utils/vcf.py CHANGED
@@ -89,7 +89,7 @@ def check_sorting(previous_chrom, previous_pos, current_chrom, current_pos):
89
89
  pass
90
90
 
91
91
 
92
- def check_vcf(vcf_path, expected_type="snv"):
92
+ def check_vcf(vcf_path, keep_chr_prefix=None, expected_type="snv"):
93
93
  """Check if there are any problems with the vcf file
94
94
 
95
95
  Args:
@@ -113,7 +113,7 @@ def check_vcf(vcf_path, expected_type="snv"):
113
113
  previous_pos = None
114
114
  previous_chrom = None
115
115
 
116
- posititon_variants = set()
116
+ position_variants = set()
117
117
 
118
118
  nr_variants = 0
119
119
  for nr_variants, variant in enumerate(vcf, 1):
@@ -134,36 +134,36 @@ def check_vcf(vcf_path, expected_type="snv"):
134
134
  variant_id = "{0}_{1}".format(current_chrom, current_pos)
135
135
  # For SNVs we can create a proper variant id with chrom_pos_ref_alt
136
136
  if variant_type == "snv":
137
- variant_id = get_variant_id(variant)
137
+ variant_id = get_variant_id(variant, keep_chr_prefix)
138
138
 
139
139
  # Initiate variables
140
140
  if not previous_chrom:
141
141
  previous_chrom = current_chrom
142
142
  previous_pos = current_pos
143
- posititon_variants = {variant_id}
143
+ position_variants = {variant_id}
144
144
  continue
145
145
 
146
146
  # Update variables if new chromosome
147
147
  if current_chrom != previous_chrom:
148
148
  previous_chrom = current_chrom
149
149
  previous_pos = current_pos
150
- posititon_variants = {variant_id}
150
+ position_variants = {variant_id}
151
151
  continue
152
152
 
153
153
  if variant_type == "snv":
154
154
  # Check if variant is unique
155
155
  if current_pos == previous_pos:
156
- if variant_id in posititon_variants:
156
+ if variant_id in position_variants:
157
157
  raise VcfError("Variant {0} occurs several times" " in vcf".format(variant_id))
158
158
  else:
159
- posititon_variants.add(variant_id)
159
+ position_variants.add(variant_id)
160
160
  # Check if vcf is sorted
161
161
  else:
162
162
  if not current_pos >= previous_pos:
163
163
  raise VcfError("Vcf if not sorted in a correct way")
164
164
  previous_pos = current_pos
165
- # Reset posititon_variants since we are on a new position
166
- posititon_variants = {variant_id}
165
+ # Reset position_variants since we are on a new position
166
+ position_variants = {variant_id}
167
167
 
168
168
  if variant_type != expected_type:
169
169
  raise VcfError(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: loqusdb
3
- Version: 2.7.17
3
+ Version: 2.7.19
4
4
  Summary: A simple observation count database
5
5
  License: MIT
6
6
  Author: Your Name
@@ -1,17 +1,17 @@
1
- loqusdb/__init__.py,sha256=OKW6E-XujZmZRrXRSOInDNnGqybJIkbeIBvnFcWqhuo,1688
1
+ loqusdb/__init__.py,sha256=Zs9AtDiQwuASVgXDU0xzuWv8RhaadjMaa9WD4D7BMVc,1688
2
2
  loqusdb/__main__.py,sha256=8FGKySAGaWSzAYMj6HRsxeyiME3V01Idt7HrmN7pSYY,397
3
3
  loqusdb/build_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  loqusdb/build_models/case.py,sha256=AByutEYK2N3kS9JFvyZfPKNZdCpZHCSD0nNHAgaU1Cs,4127
5
- loqusdb/build_models/profile_variant.py,sha256=TbSxfVjESstS_FgbkOW4NQwMQVeTyhn9oc9yPZmDhzI,1021
6
- loqusdb/build_models/variant.py,sha256=2ate8viYhHK1yX7UvCzEPgfuuXwTgAApU_2E5jl1ZO4,6934
5
+ loqusdb/build_models/profile_variant.py,sha256=WzWhxq4HNvf67IknyBWYnMHQzPMZ9eitw_so6lfOkPc,1166
6
+ loqusdb/build_models/variant.py,sha256=buIQr8GsNUBBtgf78a0n5I_GiMEogohSEQJibVUuM5Y,7815
7
7
  loqusdb/commands/__init__.py,sha256=BXAN3UADgqPrkGczzjlLO9GyyQ96dnLnP7n92JlYHgo,603
8
- loqusdb/commands/annotate.py,sha256=748kImopE5WbaO1nuv3WUgIqezWFSsi7SBeWhOz26-s,1384
9
- loqusdb/commands/cli.py,sha256=wJD5S1BoCxtRTAobd1QtmQpzlngJg-mt1nsyD92fDD4,3176
10
- loqusdb/commands/delete.py,sha256=R6ysHKSMw1mmL4ZbktoUIKzdzDLQ3314YPYhIy1myic,1979
8
+ loqusdb/commands/annotate.py,sha256=MGU9EerKYsFx1lkyjQ6ZMUKYuShi0uSTPJCS0cyxq7U,1467
9
+ loqusdb/commands/cli.py,sha256=XRprLQaENiLdqXG_7ugCC9jTcG7Uh54_M0KZj1ERFaM,3542
10
+ loqusdb/commands/delete.py,sha256=BRtm6Uade3l97FBcKFNkiYjks84AhuXYo-2QD8E74A4,2120
11
11
  loqusdb/commands/export.py,sha256=HKoRzUo_BHNOdw_TcKUId9TTowi8VJVGqnuDlK-FqFE,3531
12
12
  loqusdb/commands/identity.py,sha256=KLA9c8e6cJFDxtqIa1G6zdHTHK1sz2b3v1Utdtik_4k,787
13
- loqusdb/commands/load.py,sha256=6rrt_XIdUHhNc-HisMRS7G-bWkgYhqggDjbEQubYmUQ,4635
14
- loqusdb/commands/load_profile.py,sha256=cflCbF9f77_HCH8xPnN8zSSocvIffRMnC2LPE0j7Xq8,3336
13
+ loqusdb/commands/load.py,sha256=pHtjldblUM-HFFgcN5UtoaxGhYmo1yeexqGq4I427qk,4996
14
+ loqusdb/commands/load_profile.py,sha256=x-T2bzi2SL5kwZhY_3hHQCtGDLao1xkxj1pZaOnzs4U,3436
15
15
  loqusdb/commands/migrate.py,sha256=2C8YL-zVqnpnqg3JIyUr0rbVnb8-AGPVWNhicHnPKLo,667
16
16
  loqusdb/commands/restore.py,sha256=eqPX0yao0IAYS5SbjCdlsfSJRBbRByBLISUU2hTzqqs,1492
17
17
  loqusdb/commands/update.py,sha256=zz3wueaJVqJ1FKact-rpY2az__5oa1LnZKf7mgqNGPk,3211
@@ -40,17 +40,17 @@ loqusdb/resources/loqusdb.20181005.gz,sha256=DI8CLI7fPnIAjM25Avraz-C7KQkOKsfnhgZ
40
40
  loqusdb/resources/maf_50_sites_GRCh37.vcf.gz,sha256=BoD1_xZ-Rr8DTWCMNlQGh7gz1K8FA-j2nC4jKn_eB2A,5260
41
41
  loqusdb/resources/maf_50_sites_GRCh38.vcf.gz,sha256=6T4iyrIr6yx1HpgobzAsh305BO1JX0oGj48nFiYt2QM,9037
42
42
  loqusdb/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
- loqusdb/utils/annotate.py,sha256=cPNWlhsv6yoe3lxNfa9DytO5eACuM_mOJJw_mglVMN0,2646
43
+ loqusdb/utils/annotate.py,sha256=vOHlLkenwCCLXh-cjerd9cW68eZfEtgvP0IwWh-oBHs,2347
44
44
  loqusdb/utils/case.py,sha256=aeTvyACJTDjzl-aOjAZaUzFMLisgFKMfcoXSvNAZz4s,2168
45
- loqusdb/utils/delete.py,sha256=-ddBM_QXKzlUN6egEJggKzXX1P-WEdi92HgaD1DJRtg,4843
46
- loqusdb/utils/load.py,sha256=1UVB5jUy1DESZdWnRrMcyfFiUgqmj9l-Bm0DZOHetfg,8428
45
+ loqusdb/utils/delete.py,sha256=uj1m5i12GjUhhnCnIbh6D7BMG-oMDk6bfrJxk8zpSxE,5208
46
+ loqusdb/utils/load.py,sha256=GgJyTLSOpgcEqjvo9RXzcacQLzHZYtXF_tkyp_XJwOs,9448
47
47
  loqusdb/utils/migrate.py,sha256=9Q6kdIi9TpFVzDYptlEE8RqPPS5wyzfM3F8egzmmBBk,1113
48
- loqusdb/utils/profiling.py,sha256=3OizF7CpYvSl9kyl2g4KGJxbIRUqWfmfLxn3843XYDk,9164
48
+ loqusdb/utils/profiling.py,sha256=uISq4xfRNPPedoYXS_D4dXphq8odDogfMBm_XfHBTpE,9232
49
49
  loqusdb/utils/update.py,sha256=1edJG-u24FgOSxyXAQEiyTG4IyK-Uo3lSIl5qyzcXsI,4433
50
50
  loqusdb/utils/variant.py,sha256=U6nMZRUf5NDDQ74nG0HBCLMnFQVgFAT6eHll_F2uiwc,2087
51
- loqusdb/utils/vcf.py,sha256=ybmrTBEPYa0FbUXo8ttlwATk13RnKjX9eIDbRDwCiVE,5175
52
- loqusdb-2.7.17.dist-info/LICENSE,sha256=urpFcJXw3elN9kV2fFutc-lXegjuu2lqP_GSy8_CAbs,1054
53
- loqusdb-2.7.17.dist-info/METADATA,sha256=WH2QGz7PhKif97YZxHnOOxmiaN96ZAd_zGCfUnm2GGU,5321
54
- loqusdb-2.7.17.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
55
- loqusdb-2.7.17.dist-info/entry_points.txt,sha256=wFoWzEFjsSgXkj9FMQA8C9ihZoJ9R1XvbGuX9hEEI6E,52
56
- loqusdb-2.7.17.dist-info/RECORD,,
51
+ loqusdb/utils/vcf.py,sha256=og8JBYock31v_0CnsoRhuKIJCurLCIFW8PCCQIRWF-Q,5207
52
+ loqusdb-2.7.19.dist-info/LICENSE,sha256=urpFcJXw3elN9kV2fFutc-lXegjuu2lqP_GSy8_CAbs,1054
53
+ loqusdb-2.7.19.dist-info/METADATA,sha256=w1TxeA5Lz1cYqetHRw0UGvuEUS82WfpOgxThm5hQOYs,5321
54
+ loqusdb-2.7.19.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
55
+ loqusdb-2.7.19.dist-info/entry_points.txt,sha256=wFoWzEFjsSgXkj9FMQA8C9ihZoJ9R1XvbGuX9hEEI6E,52
56
+ loqusdb-2.7.19.dist-info/RECORD,,