loqusdb 2.7.19__py3-none-any.whl → 2.7.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
loqusdb/__init__.py CHANGED
@@ -4,7 +4,7 @@ from pymongo import ASCENDING, IndexModel
4
4
 
5
5
  logger = logging.getLogger(__name__)
6
6
 
7
- __version__ = "2.7.19"
7
+ __version__ = "2.7.20"
8
8
 
9
9
  INDEXES = {
10
10
  "variant": [
@@ -61,32 +61,3 @@ INDEXES = {
61
61
  ),
62
62
  ],
63
63
  }
64
-
65
- CHROMOSOME_ORDER = (
66
- "1",
67
- "2",
68
- "3",
69
- "4",
70
- "5",
71
- "6",
72
- "7",
73
- "8",
74
- "9",
75
- "10",
76
- "11",
77
- "12",
78
- "13",
79
- "14",
80
- "15",
81
- "16",
82
- "17",
83
- "18",
84
- "19",
85
- "20",
86
- "21",
87
- "22",
88
- "23",
89
- "X",
90
- "Y",
91
- "MT",
92
- )
@@ -15,18 +15,17 @@ Position = namedtuple("Position", "chrom pos")
15
15
  # These are coordinate for the pseudo autosomal regions in GRCh37
16
16
 
17
17
 
18
- def check_par(chrom, pos, genome_build=None):
18
+ def check_par(chrom, pos, genome_build):
19
19
  """Check if a coordinate is in the PAR region
20
20
 
21
21
  Args:
22
22
  chrom(str)
23
23
  pos(int)
24
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
24
25
 
25
26
  Returns:
26
27
  par(bool)
27
28
  """
28
- if genome_build is None:
29
- genome_build = GRCH37
30
29
  return any(
31
30
  pos >= interval[0] and pos <= interval[1] for interval in PAR[genome_build].get(chrom, [])
32
31
  )
@@ -50,24 +49,25 @@ def get_variant_id(variant, keep_chr_prefix=None):
50
49
  return "_".join([str(chrom), str(variant.POS), str(variant.REF), str(variant.ALT[0])])
51
50
 
52
51
 
53
- def is_greater(a, b):
52
+ def is_greater(a, b, genome_build):
54
53
  """Check if position a is greater than position b
55
54
  This will look at chromosome and position.
56
55
 
57
56
  For example a position where chrom = 2 and pos = 300 is greater than a position where
58
57
  chrom = 1 and pos = 1000
59
58
 
60
- If any of the chromosomes is outside [1-22,X,Y,MT] we can not say which is biggest.
59
+ If any of the chromosomes is outside [1-22,X,Y,MT] or [chr1-chr22,chrX,chrY,chrM] we can not say which is biggest.
61
60
 
62
61
  Args:
63
62
  a,b(Position)
63
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
64
64
 
65
65
  Returns:
66
66
  bool: True if a is greater than b
67
67
  """
68
68
 
69
- a_chrom = CHROM_TO_INT.get(a.chrom, 0)
70
- b_chrom = CHROM_TO_INT.get(b.chrom, 0)
69
+ a_chrom = CHROM_TO_INT[genome_build].get(a.chrom, 0)
70
+ b_chrom = CHROM_TO_INT[genome_build].get(b.chrom, 0)
71
71
 
72
72
  if a_chrom == 0 or b_chrom == 0:
73
73
  return False
@@ -78,12 +78,13 @@ def is_greater(a, b):
78
78
  return a_chrom == b_chrom and a.pos > b.pos
79
79
 
80
80
 
81
- def get_coords(variant, keep_chr_prefix):
81
+ def get_coords(variant, keep_chr_prefix, genome_build):
82
82
  """Returns a dictionary with position information
83
83
 
84
84
  Args:
85
85
  variant(cyvcf2.Variant)
86
86
  keep_chr_prefix(bool): Retain chr/CHR/Chr prefix when present
87
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
87
88
 
88
89
  Returns:
89
90
  coordinates(dict)
@@ -139,7 +140,7 @@ def get_coords(variant, keep_chr_prefix):
139
140
  end_position = Position(end_chrom, end)
140
141
 
141
142
  # If 'start' is greater than 'end', switch positions
142
- if is_greater(position, end_position):
143
+ if is_greater(position, end_position, genome_build=genome_build):
143
144
  end_chrom = position.chrom
144
145
  end = position.pos
145
146
 
@@ -201,7 +202,7 @@ def build_variant(
201
202
  # We allways assume splitted and normalized VCFs
202
203
  alt = variant.ALT[0]
203
204
 
204
- coordinates = get_coords(variant, keep_chr_prefix)
205
+ coordinates = get_coords(variant, keep_chr_prefix, genome_build=genome_build)
205
206
  chrom = coordinates["chrom"]
206
207
  pos = coordinates["pos"]
207
208
 
@@ -239,7 +240,7 @@ def build_variant(
239
240
  # If variant in X or Y and individual is male,
240
241
  # we need to check hemizygosity
241
242
  if (
242
- chrom in ["X", "Y"]
243
+ chrom in ["X", "Y", "chrX", "chrY"]
243
244
  and ind_obj["sex"] == 1
244
245
  and not check_par(chrom, pos, genome_build=genome_build)
245
246
  ):
loqusdb/commands/cli.py CHANGED
@@ -52,6 +52,8 @@ LOG = logging.getLogger(__name__)
52
52
  @click.option(
53
53
  "-g",
54
54
  "--genome-build",
55
+ default="GRCh37",
56
+ show_default=True,
55
57
  type=click.Choice([GRCH37, GRCH38]),
56
58
  help="Specify what genome build to use",
57
59
  )
@@ -121,7 +123,7 @@ def cli(
121
123
 
122
124
  adapter = MongoAdapter(client, db_name=database)
123
125
 
124
- genome_build = genome_build or configs.get("genome_build") or GRCH37
126
+ genome_build = genome_build or configs.get("genome_build")
125
127
  keep_chr_prefix = keep_chr_prefix or configs.get("keep_chr_prefix")
126
128
 
127
129
  ctx.obj = {}
@@ -2,7 +2,7 @@ import logging
2
2
  from datetime import datetime
3
3
 
4
4
  import click
5
- from loqusdb import CHROMOSOME_ORDER
5
+ from loqusdb.constants import CHROMOSOMES, GRCH37, GRCH38
6
6
  from loqusdb.utils.variant import format_variant
7
7
  from vcftoolbox import HeaderParser, print_headers, print_variant
8
8
 
@@ -43,11 +43,22 @@ def export(ctx, outfile, variant_type, freq):
43
43
  is_sv = variant_type == "sv"
44
44
  existing_chromosomes = set(adapter.get_chromosomes(sv=is_sv))
45
45
 
46
+ genome = ctx.obj["genome_build"]
47
+ chromosome_order = CHROMOSOMES[genome]
48
+ keep_chr_prefix = ctx.obj["keep_chr_prefix"]
49
+
46
50
  ordered_chromosomes = []
47
- for chrom in CHROMOSOME_ORDER:
48
- if chrom in existing_chromosomes:
51
+ for chrom in chromosome_order:
52
+ if keep_chr_prefix and chrom in existing_chromosomes:
49
53
  ordered_chromosomes.append(chrom)
50
54
  existing_chromosomes.remove(chrom)
55
+ elif not keep_chr_prefix:
56
+ if genome == GRCH37 and chrom in existing_chromosomes:
57
+ ordered_chromosomes.append(chrom)
58
+ existing_chromosomes.remove(chrom)
59
+ elif genome == GRCH38 and chrom[3:] in existing_chromosomes:
60
+ ordered_chromosomes.append(chrom)
61
+ existing_chromosomes.remove(chrom)
51
62
  for chrom in existing_chromosomes:
52
63
  ordered_chromosomes.append(chrom)
53
64
 
@@ -17,35 +17,67 @@ PAR = {
17
17
  GENOTYPE_MAP = {0: "hom_ref", 1: "het", 2: "no_call", 3: "hom_alt"}
18
18
 
19
19
  # To keep the order of chromosomes
20
- CHROMOSOMES = (
21
- "1",
22
- "2",
23
- "3",
24
- "4",
25
- "5",
26
- "6",
27
- "7",
28
- "8",
29
- "9",
30
- "10",
31
- "11",
32
- "12",
33
- "13",
34
- "14",
35
- "15",
36
- "16",
37
- "17",
38
- "18",
39
- "19",
40
- "20",
41
- "21",
42
- "22",
43
- "X",
44
- "Y",
45
- "MT",
46
- )
20
+ CHROMOSOMES = {
21
+ GRCH37: [
22
+ "1",
23
+ "2",
24
+ "3",
25
+ "4",
26
+ "5",
27
+ "6",
28
+ "7",
29
+ "8",
30
+ "9",
31
+ "10",
32
+ "11",
33
+ "12",
34
+ "13",
35
+ "14",
36
+ "15",
37
+ "16",
38
+ "17",
39
+ "18",
40
+ "19",
41
+ "20",
42
+ "21",
43
+ "22",
44
+ "X",
45
+ "Y",
46
+ "MT",
47
+ ],
48
+ GRCH38: [
49
+ "chr1",
50
+ "chr2",
51
+ "chr3",
52
+ "chr4",
53
+ "chr5",
54
+ "chr6",
55
+ "chr7",
56
+ "chr8",
57
+ "chr9",
58
+ "chr10",
59
+ "chr11",
60
+ "chr12",
61
+ "chr13",
62
+ "chr14",
63
+ "chr15",
64
+ "chr16",
65
+ "chr17",
66
+ "chr18",
67
+ "chr19",
68
+ "chr20",
69
+ "chr21",
70
+ "chr22",
71
+ "chrX",
72
+ "chrY",
73
+ "chrM",
74
+ ],
75
+ }
47
76
 
48
- CHROM_TO_INT = {chrom: i + 1 for i, chrom in enumerate(CHROMOSOMES)}
77
+ CHROM_TO_INT = {
78
+ build: {chrom: i + 1 for i, chrom in enumerate(chromosomes)}
79
+ for build, chromosomes in CHROMOSOMES.items()
80
+ }
49
81
 
50
82
  # Ranges of hamming distances to be checked when 'loqusdb profile --stats'
51
83
  # items: <range_name>: <range>
loqusdb/utils/delete.py CHANGED
@@ -21,6 +21,7 @@ def delete(
21
21
  existing_case(models.Case): If something failed during an update we need to revert
22
22
  to the original case
23
23
  keep_chr_prefix(bool): Retain chr/CHR/Chr prefixes in chromosome IDs when they are present
24
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
24
25
 
25
26
  """
26
27
  # This will overwrite the updated case with the previous one
@@ -48,7 +49,11 @@ def delete(
48
49
  elif file_type == "vcf_sv_path":
49
50
  LOG.info("deleting structural variants")
50
51
  delete_structural_variants(
51
- adapter=adapter, vcf_obj=vcf_obj, case_obj=case_obj, keep_chr_prefix=keep_chr_prefix
52
+ adapter=adapter,
53
+ vcf_obj=vcf_obj,
54
+ case_obj=case_obj,
55
+ keep_chr_prefix=keep_chr_prefix,
56
+ genome_build=genome_build,
52
57
  )
53
58
 
54
59
 
@@ -62,6 +67,7 @@ def delete_variants(
62
67
  vcf_obj(iterable(dict))
63
68
  ind_positions(dict)
64
69
  case_id(str)
70
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
65
71
 
66
72
  Returns:
67
73
  nr_deleted (int): Number of deleted variants
@@ -120,7 +126,9 @@ def delete_variants(
120
126
  return nr_deleted
121
127
 
122
128
 
123
- def delete_structural_variants(adapter, vcf_obj, case_obj, keep_chr_prefix=None, case_id=None):
129
+ def delete_structural_variants(
130
+ adapter, vcf_obj, case_obj, genome_build, keep_chr_prefix=None, case_id=None
131
+ ):
124
132
  """Delete structural variants for a case in the database
125
133
 
126
134
  Args:
@@ -128,6 +136,7 @@ def delete_structural_variants(adapter, vcf_obj, case_obj, keep_chr_prefix=None,
128
136
  vcf_obj(iterable(dict))
129
137
  ind_positions(dict)
130
138
  case_id(str)
139
+ genome_build(str): Genome build. Ex. GRCh37 or GRCh38
131
140
 
132
141
  Returns:
133
142
  nr_deleted (int): Number of deleted variants"""
@@ -141,7 +150,11 @@ def delete_structural_variants(adapter, vcf_obj, case_obj, keep_chr_prefix=None,
141
150
 
142
151
  for variant in vcf_obj:
143
152
  formated_variant = build_variant(
144
- variant=variant, case_obj=case_obj, case_id=case_id, keep_chr_prefix=keep_chr_prefix
153
+ variant=variant,
154
+ case_obj=case_obj,
155
+ case_id=case_id,
156
+ genome_build=genome_build,
157
+ keep_chr_prefix=keep_chr_prefix,
145
158
  )
146
159
 
147
160
  if not formated_variant:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: loqusdb
3
- Version: 2.7.19
3
+ Version: 2.7.20
4
4
  Summary: A simple observation count database
5
5
  License: MIT
6
6
  Author: Your Name
@@ -1,14 +1,14 @@
1
- loqusdb/__init__.py,sha256=Zs9AtDiQwuASVgXDU0xzuWv8RhaadjMaa9WD4D7BMVc,1688
1
+ loqusdb/__init__.py,sha256=HCx0k7_ndzjGnRnnEFruQxC4eQlKosO4gSJ2TP86I1g,1415
2
2
  loqusdb/__main__.py,sha256=8FGKySAGaWSzAYMj6HRsxeyiME3V01Idt7HrmN7pSYY,397
3
3
  loqusdb/build_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  loqusdb/build_models/case.py,sha256=AByutEYK2N3kS9JFvyZfPKNZdCpZHCSD0nNHAgaU1Cs,4127
5
5
  loqusdb/build_models/profile_variant.py,sha256=WzWhxq4HNvf67IknyBWYnMHQzPMZ9eitw_so6lfOkPc,1166
6
- loqusdb/build_models/variant.py,sha256=buIQr8GsNUBBtgf78a0n5I_GiMEogohSEQJibVUuM5Y,7815
6
+ loqusdb/build_models/variant.py,sha256=4_FDSRUGDt-h3lqBJ00kC9QDdTTGzpbEF-s51AjLQLU,8094
7
7
  loqusdb/commands/__init__.py,sha256=BXAN3UADgqPrkGczzjlLO9GyyQ96dnLnP7n92JlYHgo,603
8
8
  loqusdb/commands/annotate.py,sha256=MGU9EerKYsFx1lkyjQ6ZMUKYuShi0uSTPJCS0cyxq7U,1467
9
- loqusdb/commands/cli.py,sha256=XRprLQaENiLdqXG_7ugCC9jTcG7Uh54_M0KZj1ERFaM,3542
9
+ loqusdb/commands/cli.py,sha256=lRgOYN3JDE81z3EUdqc5-eU7i0m5XF8p0WZvI3svQ3g,3577
10
10
  loqusdb/commands/delete.py,sha256=BRtm6Uade3l97FBcKFNkiYjks84AhuXYo-2QD8E74A4,2120
11
- loqusdb/commands/export.py,sha256=HKoRzUo_BHNOdw_TcKUId9TTowi8VJVGqnuDlK-FqFE,3531
11
+ loqusdb/commands/export.py,sha256=3eV0pYfkD73qoB0_Y2_wpxV0xOrCAppVoPs9C815zyI,4078
12
12
  loqusdb/commands/identity.py,sha256=KLA9c8e6cJFDxtqIa1G6zdHTHK1sz2b3v1Utdtik_4k,787
13
13
  loqusdb/commands/load.py,sha256=pHtjldblUM-HFFgcN5UtoaxGhYmo1yeexqGq4I427qk,4996
14
14
  loqusdb/commands/load_profile.py,sha256=x-T2bzi2SL5kwZhY_3hHQCtGDLao1xkxj1pZaOnzs4U,3436
@@ -17,7 +17,7 @@ loqusdb/commands/restore.py,sha256=eqPX0yao0IAYS5SbjCdlsfSJRBbRByBLISUU2hTzqqs,1
17
17
  loqusdb/commands/update.py,sha256=zz3wueaJVqJ1FKact-rpY2az__5oa1LnZKf7mgqNGPk,3211
18
18
  loqusdb/commands/view.py,sha256=PkwyvzQgq5ArrEakI-lKQThrhjBLLl2gYejHI2g13WU,5197
19
19
  loqusdb/commands/wipe.py,sha256=WTOjyNooCUhtmZ6pdcPFa0PZrFc9E_pkLbnat_zP96M,553
20
- loqusdb/constants/__init__.py,sha256=r6y2TN8BqbKuh2Uyxq0trh-3A9xiWeStqWlvEPp-rSA,1645
20
+ loqusdb/constants/__init__.py,sha256=BpZQYpUF-B9AqoXE2R5XMFuURxH5iNLt1kUJSKMqZGY,2265
21
21
  loqusdb/exceptions/__init__.py,sha256=Fq0UQg9TepWh19D7WT3dARyAHvorwJF6phhnZi2AkxE,88
22
22
  loqusdb/exceptions/case.py,sha256=n3mGF7RIc1imQFxnNJ1TWxeJeMWN4MHsKxoZb0m1-Os,92
23
23
  loqusdb/exceptions/profile.py,sha256=TVkRXh3ZbkNCmFHzZTCuhPP3iFWBwP1YQGD8IlSoCTo,98
@@ -42,15 +42,15 @@ loqusdb/resources/maf_50_sites_GRCh38.vcf.gz,sha256=6T4iyrIr6yx1HpgobzAsh305BO1J
42
42
  loqusdb/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
43
  loqusdb/utils/annotate.py,sha256=vOHlLkenwCCLXh-cjerd9cW68eZfEtgvP0IwWh-oBHs,2347
44
44
  loqusdb/utils/case.py,sha256=aeTvyACJTDjzl-aOjAZaUzFMLisgFKMfcoXSvNAZz4s,2168
45
- loqusdb/utils/delete.py,sha256=uj1m5i12GjUhhnCnIbh6D7BMG-oMDk6bfrJxk8zpSxE,5208
45
+ loqusdb/utils/delete.py,sha256=X2SA0IdynoLcV-LwSg_Y_jqh-1uDRE6EoaVTiCWSPPM,5582
46
46
  loqusdb/utils/load.py,sha256=GgJyTLSOpgcEqjvo9RXzcacQLzHZYtXF_tkyp_XJwOs,9448
47
47
  loqusdb/utils/migrate.py,sha256=9Q6kdIi9TpFVzDYptlEE8RqPPS5wyzfM3F8egzmmBBk,1113
48
48
  loqusdb/utils/profiling.py,sha256=uISq4xfRNPPedoYXS_D4dXphq8odDogfMBm_XfHBTpE,9232
49
49
  loqusdb/utils/update.py,sha256=1edJG-u24FgOSxyXAQEiyTG4IyK-Uo3lSIl5qyzcXsI,4433
50
50
  loqusdb/utils/variant.py,sha256=U6nMZRUf5NDDQ74nG0HBCLMnFQVgFAT6eHll_F2uiwc,2087
51
51
  loqusdb/utils/vcf.py,sha256=og8JBYock31v_0CnsoRhuKIJCurLCIFW8PCCQIRWF-Q,5207
52
- loqusdb-2.7.19.dist-info/LICENSE,sha256=urpFcJXw3elN9kV2fFutc-lXegjuu2lqP_GSy8_CAbs,1054
53
- loqusdb-2.7.19.dist-info/METADATA,sha256=w1TxeA5Lz1cYqetHRw0UGvuEUS82WfpOgxThm5hQOYs,5321
54
- loqusdb-2.7.19.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
55
- loqusdb-2.7.19.dist-info/entry_points.txt,sha256=wFoWzEFjsSgXkj9FMQA8C9ihZoJ9R1XvbGuX9hEEI6E,52
56
- loqusdb-2.7.19.dist-info/RECORD,,
52
+ loqusdb-2.7.20.dist-info/LICENSE,sha256=urpFcJXw3elN9kV2fFutc-lXegjuu2lqP_GSy8_CAbs,1054
53
+ loqusdb-2.7.20.dist-info/METADATA,sha256=efArcCfHQNeSsFf5C2EPYII5cYFYTf80nmIIpLLFanM,5321
54
+ loqusdb-2.7.20.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
55
+ loqusdb-2.7.20.dist-info/entry_points.txt,sha256=wFoWzEFjsSgXkj9FMQA8C9ihZoJ9R1XvbGuX9hEEI6E,52
56
+ loqusdb-2.7.20.dist-info/RECORD,,