loqusdb 2.7.16__py3-none-any.whl → 2.7.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
loqusdb/__init__.py CHANGED
@@ -4,7 +4,7 @@ from pymongo import ASCENDING, IndexModel
4
4
 
5
5
  logger = logging.getLogger(__name__)
6
6
 
7
- __version__ = "2.7.16"
7
+ __version__ = "2.7.18"
8
8
 
9
9
  INDEXES = {
10
10
  "variant": [
@@ -148,6 +148,7 @@ def build_variant(
148
148
  case_id: Optional[str] = None,
149
149
  gq_threshold: Optional[int] = None,
150
150
  gq_qual: Optional[bool] = False,
151
+ ignore_gq_if_unset: Optional[bool] = False,
151
152
  genome_build: Optional[str] = None,
152
153
  ) -> Variant:
153
154
  """Return a Variant object
@@ -164,6 +165,8 @@ def build_variant(
164
165
  case_id(str): The case id
165
166
  gq_threshold(int): Genotype Quality threshold
166
167
  gq_qual(bool): Use variant.QUAL for quality instead of GQ
168
+ ignore_gq_if_unset(bool): Ignore GQ threshold check for variants that do not have GQ or QUAL set.
169
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
167
170
 
168
171
  Return:
169
172
  formated_variant(models.Variant): A variant dictionary
@@ -201,14 +204,15 @@ def build_variant(
201
204
  ind_pos = ind_obj["ind_index"]
202
205
 
203
206
  if gq_qual:
204
- gq = 0
207
+ gq = -1
205
208
  if variant.QUAL:
206
209
  gq = int(variant.QUAL)
207
210
 
208
211
  if not gq_qual:
209
212
  gq = int(variant.gt_quals[ind_pos])
210
213
 
211
- if gq_threshold and gq < gq_threshold:
214
+ # When gq is missing in FORMAT cyvcf2 assigns a score of -1
215
+ if (gq_threshold and 0 <= gq < gq_threshold) or (gq == -1 and not ignore_gq_if_unset):
212
216
  continue
213
217
 
214
218
  genotype = GENOTYPE_MAP[variant.gt_types[ind_pos]]
loqusdb/commands/load.py CHANGED
@@ -88,6 +88,20 @@ def validate_profile_threshold(ctx, param, value):
88
88
  callback=validate_profile_threshold,
89
89
  help="profile hamming distance to store similar individuals (0-1)",
90
90
  )
91
+ @click.option(
92
+ "--snv-gq-only",
93
+ is_flag=True,
94
+ default=False,
95
+ show_default=True,
96
+ help="Apply GQ threshold only to SNV variants",
97
+ )
98
+ @click.option(
99
+ "--ignore-gq-if-unset",
100
+ is_flag=True,
101
+ default=False,
102
+ show_default=True,
103
+ help="Ignore GQ threshold if GQ (or the QUAL field for --qual-gq) is unset in VCF",
104
+ )
91
105
  @click.pass_context
92
106
  def load(
93
107
  ctx,
@@ -104,6 +118,8 @@ def load(
104
118
  hard_threshold,
105
119
  soft_threshold,
106
120
  qual_gq,
121
+ snv_gq_only,
122
+ ignore_gq_if_unset,
107
123
  ):
108
124
  """Load the variants of a case
109
125
 
@@ -145,12 +161,14 @@ def load(
145
161
  skip_case_id=skip_case_id,
146
162
  case_id=case_id,
147
163
  gq_threshold=gq_threshold,
164
+ snv_gq_only=snv_gq_only,
148
165
  qual_gq=qual_gq,
149
166
  max_window=max_window,
150
167
  profile_file=variant_profile_path,
151
168
  hard_threshold=hard_threshold,
152
169
  soft_threshold=soft_threshold,
153
170
  genome_build=genome_build,
171
+ ignore_gq_if_unset=ignore_gq_if_unset,
154
172
  )
155
173
  except (SyntaxError, CaseError, IOError) as error:
156
174
  LOG.warning(error)
loqusdb/utils/load.py CHANGED
@@ -31,6 +31,7 @@ def load_database(
31
31
  family_type="ped",
32
32
  skip_case_id=False,
33
33
  gq_threshold=None,
34
+ snv_gq_only=False,
34
35
  qual_gq=False,
35
36
  case_id=None,
36
37
  max_window=3000,
@@ -38,6 +39,7 @@ def load_database(
38
39
  hard_threshold=0.95,
39
40
  soft_threshold=0.9,
40
41
  genome_build=None,
42
+ ignore_gq_if_unset=False,
41
43
  ):
42
44
  """Load the database with a case and its variants
43
45
 
@@ -55,6 +57,8 @@ def load_database(
55
57
  check_profile(bool): Does profile check if True
56
58
  hard_threshold(float): Rejects load if hamming distance above this is found
57
59
  soft_threshold(float): Stores similar samples if hamming distance above this is found
60
+ genome_build(str): Store the genome version
61
+ ignore_gq_if_unset(str): Ignore the gq threhsold check for variants that do not have a GQ or QUAL set
58
62
 
59
63
  Returns:
60
64
  nr_inserted(int)
@@ -88,11 +92,13 @@ def load_database(
88
92
  adapter, profiles, hard_threshold=hard_threshold, soft_threshold=soft_threshold
89
93
  )
90
94
 
91
- # If a gq threshold is used the variants needs to have GQ
95
+ # If a gq threshold is used the variants need to have GQ (only SNVs if snv_gq_only)
92
96
  for _vcf_file in vcf_files:
93
- # Get a cyvcf2.VCF object
94
- vcf = get_vcf(_vcf_file)
97
+ is_sv = _vcf_file == sv_file
98
+ if snv_gq_only and is_sv:
99
+ continue # skip GQ check for SV VCF
95
100
 
101
+ vcf = get_vcf(_vcf_file)
96
102
  if gq_threshold and not vcf.contains("GQ") and not qual_gq:
97
103
  LOG.warning("Set gq-threshold to 0 or add info to vcf {0}".format(_vcf_file))
98
104
  raise SyntaxError("GQ is not defined in vcf header")
@@ -144,11 +150,12 @@ def load_database(
144
150
  vcf_obj=vcf_obj,
145
151
  case_obj=case_obj,
146
152
  skip_case_id=skip_case_id,
147
- gq_threshold=gq_threshold,
153
+ gq_threshold=gq_threshold if not snv_gq_only or variant_type == "snv" else None,
148
154
  qual_gq=qual_gq,
149
155
  max_window=max_window,
150
156
  variant_type=variant_type,
151
157
  genome_build=genome_build,
158
+ ignore_gq_if_unset=ignore_gq_if_unset,
152
159
  )
153
160
  except Exception as err:
154
161
  # If something went wrong do a rollback
@@ -196,18 +203,22 @@ def load_variants(
196
203
  max_window=3000,
197
204
  variant_type="snv",
198
205
  genome_build=None,
206
+ ignore_gq_if_unset=False,
199
207
  ):
200
208
  """Load variants for a family into the database.
201
209
 
202
210
  Args:
203
211
  adapter (loqusdb.plugins.Adapter): initialized plugin
212
+ vcf_obj(cyvcf2.VCF): Iterable with cyvcf2.Variant
204
213
  case_obj(Case): dict with case information
205
- nr_variants(int)
206
214
  skip_case_id (bool): whether to include the case id on variant level
207
215
  or not
208
216
  gq_threshold(int)
217
+ qual_gq(bool): whether to use QUAL instead of GQ
209
218
  max_window(int): Specify the max size for sv windows
210
219
  variant_type(str): 'sv' or 'snv'
220
+ genome_build(str): Genome version. Ex. GRCH37
221
+ ignore_gq_if_unset (bool): whether to add entries that have missing GQ or QUAL field
211
222
 
212
223
  Returns:
213
224
  nr_inserted(int)
@@ -226,7 +237,13 @@ def load_variants(
226
237
 
227
238
  variants = (
228
239
  build_variant(
229
- variant, case_obj, case_id, gq_threshold, qual_gq, genome_build=genome_build
240
+ variant,
241
+ case_obj,
242
+ case_id,
243
+ gq_threshold,
244
+ qual_gq,
245
+ ignore_gq_if_unset,
246
+ genome_build=genome_build,
230
247
  )
231
248
  for variant in bar
232
249
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: loqusdb
3
- Version: 2.7.16
3
+ Version: 2.7.18
4
4
  Summary: A simple observation count database
5
5
  License: MIT
6
6
  Author: Your Name
@@ -1,16 +1,16 @@
1
- loqusdb/__init__.py,sha256=D1R2iXsppIzZJxfYNMSdRlrzsMKf-4z-tSMYAFWtEPw,1688
1
+ loqusdb/__init__.py,sha256=r4jbFPCtsyXv8VehXkXxBx1HDPPeaidp1mmwpN_Ghrg,1688
2
2
  loqusdb/__main__.py,sha256=8FGKySAGaWSzAYMj6HRsxeyiME3V01Idt7HrmN7pSYY,397
3
3
  loqusdb/build_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  loqusdb/build_models/case.py,sha256=AByutEYK2N3kS9JFvyZfPKNZdCpZHCSD0nNHAgaU1Cs,4127
5
5
  loqusdb/build_models/profile_variant.py,sha256=TbSxfVjESstS_FgbkOW4NQwMQVeTyhn9oc9yPZmDhzI,1021
6
- loqusdb/build_models/variant.py,sha256=2ate8viYhHK1yX7UvCzEPgfuuXwTgAApU_2E5jl1ZO4,6934
6
+ loqusdb/build_models/variant.py,sha256=Vee9FVwCC03wo6u2uI7qH9rgfevndBf364YSmE3Js2k,7271
7
7
  loqusdb/commands/__init__.py,sha256=BXAN3UADgqPrkGczzjlLO9GyyQ96dnLnP7n92JlYHgo,603
8
8
  loqusdb/commands/annotate.py,sha256=748kImopE5WbaO1nuv3WUgIqezWFSsi7SBeWhOz26-s,1384
9
9
  loqusdb/commands/cli.py,sha256=wJD5S1BoCxtRTAobd1QtmQpzlngJg-mt1nsyD92fDD4,3176
10
10
  loqusdb/commands/delete.py,sha256=R6ysHKSMw1mmL4ZbktoUIKzdzDLQ3314YPYhIy1myic,1979
11
11
  loqusdb/commands/export.py,sha256=HKoRzUo_BHNOdw_TcKUId9TTowi8VJVGqnuDlK-FqFE,3531
12
12
  loqusdb/commands/identity.py,sha256=KLA9c8e6cJFDxtqIa1G6zdHTHK1sz2b3v1Utdtik_4k,787
13
- loqusdb/commands/load.py,sha256=sDGCOGbukdfebLn1iBGyWoI6t3tijLzywP1tg3WheQI,4431
13
+ loqusdb/commands/load.py,sha256=QMUSzdaCi1lVC9F4u0xkAJtVz4N_tMPh3Nu6Obl9ky4,4903
14
14
  loqusdb/commands/load_profile.py,sha256=cflCbF9f77_HCH8xPnN8zSSocvIffRMnC2LPE0j7Xq8,3336
15
15
  loqusdb/commands/migrate.py,sha256=2C8YL-zVqnpnqg3JIyUr0rbVnb8-AGPVWNhicHnPKLo,667
16
16
  loqusdb/commands/restore.py,sha256=eqPX0yao0IAYS5SbjCdlsfSJRBbRByBLISUU2hTzqqs,1492
@@ -43,14 +43,14 @@ loqusdb/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
43
  loqusdb/utils/annotate.py,sha256=cPNWlhsv6yoe3lxNfa9DytO5eACuM_mOJJw_mglVMN0,2646
44
44
  loqusdb/utils/case.py,sha256=aeTvyACJTDjzl-aOjAZaUzFMLisgFKMfcoXSvNAZz4s,2168
45
45
  loqusdb/utils/delete.py,sha256=-ddBM_QXKzlUN6egEJggKzXX1P-WEdi92HgaD1DJRtg,4843
46
- loqusdb/utils/load.py,sha256=ZdsnmHTVGOjwG24Fm5gcUu7tcLwYSqvh3W9j6ixqyF0,8239
46
+ loqusdb/utils/load.py,sha256=LyYHPwaCVV-7edbaoPahah05yPfG9H4VCYSg1gmz_M0,9063
47
47
  loqusdb/utils/migrate.py,sha256=9Q6kdIi9TpFVzDYptlEE8RqPPS5wyzfM3F8egzmmBBk,1113
48
48
  loqusdb/utils/profiling.py,sha256=3OizF7CpYvSl9kyl2g4KGJxbIRUqWfmfLxn3843XYDk,9164
49
49
  loqusdb/utils/update.py,sha256=1edJG-u24FgOSxyXAQEiyTG4IyK-Uo3lSIl5qyzcXsI,4433
50
50
  loqusdb/utils/variant.py,sha256=U6nMZRUf5NDDQ74nG0HBCLMnFQVgFAT6eHll_F2uiwc,2087
51
51
  loqusdb/utils/vcf.py,sha256=ybmrTBEPYa0FbUXo8ttlwATk13RnKjX9eIDbRDwCiVE,5175
52
- loqusdb-2.7.16.dist-info/LICENSE,sha256=urpFcJXw3elN9kV2fFutc-lXegjuu2lqP_GSy8_CAbs,1054
53
- loqusdb-2.7.16.dist-info/METADATA,sha256=Aayv73fzmF_pCfXHNoRZEzen9CbZqZccc5pfwzMXaCE,5321
54
- loqusdb-2.7.16.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
55
- loqusdb-2.7.16.dist-info/entry_points.txt,sha256=wFoWzEFjsSgXkj9FMQA8C9ihZoJ9R1XvbGuX9hEEI6E,52
56
- loqusdb-2.7.16.dist-info/RECORD,,
52
+ loqusdb-2.7.18.dist-info/LICENSE,sha256=urpFcJXw3elN9kV2fFutc-lXegjuu2lqP_GSy8_CAbs,1054
53
+ loqusdb-2.7.18.dist-info/METADATA,sha256=yTsm5y8i4u_chGfnAbMm5ZfjorJOVZQfl4qK9vdJDCk,5321
54
+ loqusdb-2.7.18.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
55
+ loqusdb-2.7.18.dist-info/entry_points.txt,sha256=wFoWzEFjsSgXkj9FMQA8C9ihZoJ9R1XvbGuX9hEEI6E,52
56
+ loqusdb-2.7.18.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.2
2
+ Generator: poetry-core 2.1.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any