cool-seq-tool 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cool_seq_tool/__init__.py CHANGED
@@ -1,12 +1,10 @@
1
1
  """The cool_seq_tool package"""
2
2
 
3
- import logging
3
+ from importlib.metadata import PackageNotFoundError, version
4
4
 
5
- logging.basicConfig(
6
- filename="cool_seq_tool.log",
7
- format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s",
8
- )
9
- logger = logging.getLogger("cool_seq_tool")
10
- logger.setLevel(logging.DEBUG)
11
-
12
- LOG_FN = "cool_seq_tool.log"
5
+ try:
6
+ __version__ = version("cool_seq_tool")
7
+ except PackageNotFoundError:
8
+ __version__ = "unknown"
9
+ finally:
10
+ del version, PackageNotFoundError
cool_seq_tool/app.py CHANGED
@@ -11,13 +11,14 @@ from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAcces
11
11
  from cool_seq_tool.mappers import (
12
12
  AlignmentMapper,
13
13
  ExonGenomicCoordsMapper,
14
+ LiftOver,
14
15
  ManeTranscript,
15
16
  )
16
17
  from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
17
18
  from cool_seq_tool.sources.transcript_mappings import TranscriptMappings
18
19
  from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
19
20
 
20
- logger = logging.getLogger(__name__)
21
+ _logger = logging.getLogger(__name__)
21
22
 
22
23
 
23
24
  class CoolSeqTool:
@@ -29,6 +30,7 @@ class CoolSeqTool:
29
30
  * ``self.mane_transcript_mappings``: :py:class:`ManeTranscriptMappings <cool_seq_tool.sources.mane_transcript_mappings.ManeTranscriptMappings>`
30
31
  * ``self.uta_db``: :py:class:`UtaDatabase <cool_seq_tool.sources.uta_database.UtaDatabase>`
31
32
  * ``self.alignment_mapper``: :py:class:`AlignmentMapper <cool_seq_tool.mappers.alignment.AlignmentMapper>`
33
+ * ``self.liftover``: :py:class:`LiftOver <cool_seq_tool.mappers.liftover.LiftOver>`
32
34
  * ``self.mane_transcript``: :py:class:`ManeTranscript <cool_seq_tool.mappers.mane_transcript.ManeTranscript>`
33
35
  * ``self.ex_g_coords_mapper``: :py:class:`ExonGenomicCoordsMapper <cool_seq_tool.mappers.exon_genomic_coords.ExonGenomicCoordsMapper>`
34
36
  """
@@ -94,15 +96,18 @@ class CoolSeqTool:
94
96
  self.alignment_mapper = AlignmentMapper(
95
97
  self.seqrepo_access, self.transcript_mappings, self.uta_db
96
98
  )
99
+ self.liftover = LiftOver()
97
100
  self.mane_transcript = ManeTranscript(
98
101
  self.seqrepo_access,
99
102
  self.transcript_mappings,
100
103
  self.mane_transcript_mappings,
101
104
  self.uta_db,
105
+ self.liftover,
102
106
  )
103
107
  self.ex_g_coords_mapper = ExonGenomicCoordsMapper(
104
108
  self.seqrepo_access,
105
109
  self.uta_db,
106
110
  self.mane_transcript,
107
111
  self.mane_transcript_mappings,
112
+ self.liftover,
108
113
  )
@@ -8,10 +8,10 @@ from pathlib import Path
8
8
 
9
9
  from ga4gh.vrs.dataproxy import SeqRepoDataProxy
10
10
 
11
- from cool_seq_tool.schemas import ResidueMode
12
- from cool_seq_tool.utils import get_inter_residue_pos
11
+ from cool_seq_tool.schemas import Assembly, ResidueMode
12
+ from cool_seq_tool.utils import get_inter_residue_pos, process_chromosome_input
13
13
 
14
- logger = logging.getLogger(__name__)
14
+ _logger = logging.getLogger(__name__)
15
15
 
16
16
 
17
17
  SEQREPO_ROOT_DIR = environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest")
@@ -66,7 +66,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
66
66
  sequence = self.sr.fetch(ac, start=start, end=end)
67
67
  except KeyError:
68
68
  msg = f"Accession, {ac}, not found in SeqRepo"
69
- logger.warning(msg)
69
+ _logger.warning(msg)
70
70
  return "", msg
71
71
  except ValueError as e:
72
72
  error = str(e)
@@ -80,7 +80,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
80
80
  )
81
81
  else:
82
82
  msg = f"{e}"
83
- logger.warning(msg)
83
+ _logger.warning(msg)
84
84
  return "", msg
85
85
  else:
86
86
  # If start is valid, but end is invalid, SeqRepo still returns
@@ -118,7 +118,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
118
118
  )
119
119
  except KeyError:
120
120
  msg = f"SeqRepo unable to get translated identifiers for {ac}"
121
- logger.warning(msg)
121
+ _logger.warning(msg)
122
122
  return [], msg
123
123
  else:
124
124
  return ga4gh_identifiers, None
@@ -133,7 +133,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
133
133
  return self.sr.translate_alias(input_str), None
134
134
  except KeyError:
135
135
  msg = f"SeqRepo could not translate alias {input_str}"
136
- logger.warning(msg)
136
+ _logger.warning(msg)
137
137
  return [], msg
138
138
 
139
139
  def chromosome_to_acs(self, chromosome: str) -> tuple[list[str] | None, str | None]:
@@ -143,14 +143,18 @@ class SeqRepoAccess(SeqRepoDataProxy):
143
143
  :return: Accessions for chromosome (ordered by latest assembly)
144
144
  """
145
145
  acs = []
146
- for assembly in ["GRCh38", "GRCh37"]:
146
+ for assembly in reversed(Assembly.values()):
147
147
  tmp_acs, _ = self.translate_identifier(
148
- f"{assembly}:chr{chromosome}", target_namespaces="refseq"
148
+ f"{assembly}:{process_chromosome_input(chromosome)}",
149
+ target_namespaces="refseq",
149
150
  )
150
151
  acs += [ac.split("refseq:")[-1] for ac in tmp_acs]
151
152
  if acs:
152
153
  return acs, None
153
- return None, f"{chromosome} is not a valid chromosome"
154
+ return (
155
+ None,
156
+ f'Unable to find matching accessions for "{chromosome}" in SeqRepo.',
157
+ )
154
158
 
155
159
  def ac_to_chromosome(self, ac: str) -> tuple[str | None, str | None]:
156
160
  """Get chromosome for accession.
@@ -1,8 +1,9 @@
1
1
  """Module for mapping data"""
2
2
 
3
3
  from .alignment import AlignmentMapper # noqa: I001
4
+ from .liftover import LiftOver
4
5
  from .mane_transcript import ManeTranscript
5
6
  from .exon_genomic_coords import ExonGenomicCoordsMapper
6
7
 
7
8
 
8
- __all__ = ["AlignmentMapper", "ManeTranscript", "ExonGenomicCoordsMapper"]
9
+ __all__ = ["AlignmentMapper", "LiftOver", "ManeTranscript", "ExonGenomicCoordsMapper"]
@@ -4,6 +4,7 @@ import logging
4
4
  from typing import Literal, TypeVar
5
5
 
6
6
  from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
7
+ from cool_seq_tool.mappers.liftover import LiftOver
7
8
  from cool_seq_tool.mappers.mane_transcript import CdnaRepresentation, ManeTranscript
8
9
  from cool_seq_tool.schemas import (
9
10
  AnnotationLayer,
@@ -23,7 +24,7 @@ CoordinatesResponseType = TypeVar(
23
24
  "CoordinatesResponseType", GenomicDataResponse, TranscriptExonDataResponse
24
25
  )
25
26
 
26
- logger = logging.getLogger(__name__)
27
+ _logger = logging.getLogger(__name__)
27
28
 
28
29
 
29
30
  class ExonGenomicCoordsMapper:
@@ -37,6 +38,7 @@ class ExonGenomicCoordsMapper:
37
38
  uta_db: UtaDatabase,
38
39
  mane_transcript: ManeTranscript,
39
40
  mane_transcript_mappings: ManeTranscriptMappings,
41
+ liftover: LiftOver,
40
42
  ) -> None:
41
43
  """Initialize ExonGenomicCoordsMapper class.
42
44
 
@@ -63,25 +65,28 @@ class ExonGenomicCoordsMapper:
63
65
  :param uta_db: UtaDatabase instance to give access to query UTA database
64
66
  :param mane_transcript: Instance to align to MANE or compatible representation
65
67
  :param mane_transcript_mappings: Instance to provide access to ManeTranscriptMappings class
68
+ :param liftover: Instance to provide mapping between human genome assemblies
66
69
  """
67
70
  self.seqrepo_access = seqrepo_access
68
71
  self.uta_db = uta_db
69
72
  self.mane_transcript = mane_transcript
70
73
  self.mane_transcript_mappings = mane_transcript_mappings
74
+ self.liftover = liftover
71
75
 
72
76
  @staticmethod
73
77
  def _return_warnings(
74
- resp: CoordinatesResponseType, warning_msg: str
78
+ resp: CoordinatesResponseType, warning_msg: list[str]
75
79
  ) -> CoordinatesResponseType:
76
80
  """Add warnings to response object
77
81
 
78
82
  :param resp: Response object
79
- :param warning_msg: Warning message on why ``transcript_exon_data`` or
83
+ :param warning_msg: Warning message(s) on why ``transcript_exon_data`` or
80
84
  ``genomic_data`` field is ``None``
81
85
  :return: Response object with warning message
82
86
  """
83
- logger.warning(warning_msg)
84
- resp.warnings.append(warning_msg)
87
+ for msg in warning_msg:
88
+ _logger.warning(msg)
89
+ resp.warnings.append(msg)
85
90
  return resp
86
91
 
87
92
  async def transcript_to_genomic_coordinates(
@@ -126,42 +131,44 @@ class ExonGenomicCoordsMapper:
126
131
  )
127
132
 
128
133
  # Ensure valid inputs
134
+ warnings = []
129
135
  if not transcript:
130
- return self._return_warnings(resp, "Must provide `transcript`")
131
- transcript = transcript.strip()
136
+ warnings.append("Must provide `transcript`")
137
+ else:
138
+ transcript = transcript.strip()
132
139
 
133
140
  exon_start_exists, exon_end_exists = False, False
134
141
  if exon_start is not None:
135
142
  if exon_start < 1:
136
- return self._return_warnings(resp, "`exon_start` cannot be less than 1")
143
+ warnings.append("`exon_start` cannot be less than 1")
137
144
  exon_start_exists = True
138
145
 
139
146
  if exon_end is not None:
140
147
  if exon_end < 1:
141
- return self._return_warnings(resp, "`exon_end` cannot be less than 1")
148
+ warnings.append("`exon_end` cannot be less than 1")
142
149
  exon_end_exists = True
143
150
 
144
151
  if not exon_start_exists and not exon_end_exists:
145
- return self._return_warnings(
146
- resp, "Must provide either `exon_start` or `exon_end`"
147
- )
152
+ warnings.append("Must provide either `exon_start` or `exon_end`")
148
153
  if exon_start_exists and exon_end_exists and (exon_start > exon_end):
149
- return self._return_warnings(
150
- resp,
151
- f"Start exon {exon_start} is greater than end exon {exon_end}",
154
+ warnings.append(
155
+ f"Start exon {exon_start} is greater than end exon {exon_end}"
152
156
  )
153
157
 
158
+ if warnings:
159
+ return self._return_warnings(resp, warnings)
160
+
154
161
  # Get all exons and associated start/end coordinates for transcript
155
162
  tx_exons, warning = await self.uta_db.get_tx_exons(transcript)
156
163
  if not tx_exons:
157
- return self._return_warnings(resp, warning or "")
164
+ return self._return_warnings(resp, [warning] if warning else [])
158
165
 
159
166
  # Get exon start and exon end coordinates
160
167
  tx_exon_coords, warning = self.get_tx_exon_coords(
161
168
  transcript, tx_exons, exon_start, exon_end
162
169
  )
163
170
  if not tx_exon_coords:
164
- return self._return_warnings(resp, warning or "")
171
+ return self._return_warnings(resp, [warning] if warning else [])
165
172
  tx_exon_start_coords, tx_exon_end_coords = tx_exon_coords
166
173
 
167
174
  if gene:
@@ -173,7 +180,7 @@ class ExonGenomicCoordsMapper:
173
180
  transcript, tx_exon_start_coords, tx_exon_end_coords, gene=gene
174
181
  )
175
182
  if not alt_ac_start_end:
176
- return self._return_warnings(resp, warning or "")
183
+ return self._return_warnings(resp, [warning] if warning else [])
177
184
  alt_ac_start_data, alt_ac_end_data = alt_ac_start_end
178
185
 
179
186
  # Get gene and chromosome data, check that at least one was retrieved
@@ -182,8 +189,9 @@ class ExonGenomicCoordsMapper:
182
189
  if gene is None or chromosome is None:
183
190
  return self._return_warnings(
184
191
  resp,
185
- "Unable to retrieve `gene` or `chromosome` from genomic start and "
186
- "genomic end data",
192
+ [
193
+ "Unable to retrieve `gene` or `chromosome` from genomic start and genomic end data"
194
+ ],
187
195
  )
188
196
 
189
197
  g_start = alt_ac_start_data[3] - 1 if alt_ac_start_data else None
@@ -259,9 +267,8 @@ class ExonGenomicCoordsMapper:
259
267
  >>> result.genomic_data.exon_start, result.genomic_data.exon_end
260
268
  (1, 8)
261
269
 
262
- :param chromosome: Chromosome. Must give chromosome without a prefix
263
- (i.e. ``1`` or ``X``). If not provided, must provide ``alt_ac``.
264
- If ``alt_ac`` is also provided, ``alt_ac`` will be used.
270
+ :param chromosome: e.g. ``"1"`` or ``"chr1"``. If not provided, must provide
271
+ ``alt_ac``. If ``alt_ac`` is also provided, ``alt_ac`` will be used.
265
272
  :param alt_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided,
266
273
  must provide ``chromosome. If ``chromosome`` is also provided, ``alt_ac``
267
274
  will be used.
@@ -279,14 +286,23 @@ class ExonGenomicCoordsMapper:
279
286
  breakpoint for the 3' end. For the negative strand, adjacent is defined as
280
287
  the exon following the breakpoint for the 5' end and the exon preceding the
281
288
  breakpoint for the 3' end.
289
+ :param gene: gene name. Ideally, HGNC symbol. Must be given if no ``transcript``
290
+ value is provided.
282
291
  :param residue_mode: Residue mode for ``start`` and ``end``
283
292
  :return: Genomic data (inter-residue coordinates)
284
293
  """
285
294
  resp = GenomicDataResponse(
286
295
  genomic_data=None, warnings=[], service_meta=service_meta()
287
296
  )
297
+ warnings = []
288
298
  if start is None and end is None:
289
- return self._return_warnings(resp, "Must provide either `start` or `end`")
299
+ warnings.append("Must provide either `start` or `end`")
300
+ if chromosome is None and alt_ac is None:
301
+ warnings.append("Must provide either `chromosome` or `alt_ac`")
302
+ if transcript is None and gene is None:
303
+ warnings.append("Must provide either `gene` or `transcript`")
304
+ if warnings:
305
+ return self._return_warnings(resp, warnings)
290
306
 
291
307
  params = {key: None for key in GenomicData.model_fields}
292
308
  if gene is not None:
@@ -310,7 +326,7 @@ class ExonGenomicCoordsMapper:
310
326
  if start_data.transcript_exon_data:
311
327
  start_data = start_data.transcript_exon_data.model_dump()
312
328
  else:
313
- return self._return_warnings(resp, start_data.warnings[0])
329
+ return self._return_warnings(resp, [start_data.warnings[0]])
314
330
  else:
315
331
  start_data = None
316
332
 
@@ -330,7 +346,7 @@ class ExonGenomicCoordsMapper:
330
346
  if end_data.transcript_exon_data:
331
347
  end_data = end_data.transcript_exon_data.model_dump()
332
348
  else:
333
- return self._return_warnings(resp, end_data.warnings[0])
349
+ return self._return_warnings(resp, [end_data.warnings[0]])
334
350
  else:
335
351
  end_data = None
336
352
 
@@ -341,7 +357,7 @@ class ExonGenomicCoordsMapper:
341
357
  f"Start `{field}`, {start_data[field]}, does "
342
358
  f"not match End `{field}`, {end_data[field]}"
343
359
  )
344
- return self._return_warnings(resp, msg)
360
+ return self._return_warnings(resp, [msg])
345
361
  params[field] = start_data[field]
346
362
  else:
347
363
  params[field] = end_data[field]
@@ -351,7 +367,7 @@ class ExonGenomicCoordsMapper:
351
367
  f"Input gene, {gene}, does not match expected output"
352
368
  f"gene, {params['gene']}"
353
369
  )
354
- return self._return_warnings(resp, msg)
370
+ return self._return_warnings(resp, [msg])
355
371
 
356
372
  for label, data in [("start", start_data), ("end", end_data)]:
357
373
  if data:
@@ -436,7 +452,7 @@ class ExonGenomicCoordsMapper:
436
452
  """
437
453
  if tx_exon_start is None and tx_exon_end is None:
438
454
  msg = "Must provide either `tx_exon_start` or `tx_exon_end` or both"
439
- logger.warning(msg)
455
+ _logger.warning(msg)
440
456
  return None, msg
441
457
 
442
458
  alt_ac_data = {"start": None, "end": None}
@@ -462,7 +478,7 @@ class ExonGenomicCoordsMapper:
462
478
  error = "Genomic accession does not match"
463
479
  else:
464
480
  error = "Strand does not match"
465
- logger.warning(
481
+ _logger.warning(
466
482
  "%s: %s != %s",
467
483
  error,
468
484
  alt_ac_data["start"][i],
@@ -510,25 +526,22 @@ class ExonGenomicCoordsMapper:
510
526
  resp = TranscriptExonDataResponse(
511
527
  transcript_exon_data=None, warnings=[], service_meta=service_meta()
512
528
  )
513
-
514
- if transcript is None and gene is None:
515
- return self._return_warnings(
516
- resp, "Must provide either `gene` or `transcript`"
517
- )
518
-
519
529
  params = {key: None for key in TranscriptExonData.model_fields}
520
530
 
521
531
  if get_nearest_transcript_junction:
522
532
  if not gene or not strand:
523
533
  return self._return_warnings(
524
534
  resp,
525
- "Gene or strand must be provided to select the adjacent transcript junction",
535
+ [
536
+ "Gene or strand must be provided to select the adjacent transcript junction"
537
+ ],
526
538
  )
527
- alt_acs, w = self.seqrepo_access.chromosome_to_acs(chromosome)
539
+ if not alt_ac:
540
+ alt_acs, w = self.seqrepo_access.chromosome_to_acs(chromosome)
528
541
 
529
- if not alt_acs:
530
- return self._return_warnings(resp, w)
531
- alt_ac = alt_acs[0]
542
+ if not alt_acs:
543
+ return self._return_warnings(resp, [w])
544
+ alt_ac = alt_acs[0]
532
545
 
533
546
  if not transcript:
534
547
  # Select a transcript if not provided
@@ -562,14 +575,14 @@ class ExonGenomicCoordsMapper:
562
575
  else:
563
576
  return self._return_warnings(
564
577
  resp,
565
- f"Could not find a transcript for {gene} on {alt_ac}",
578
+ [f"Could not find a transcript for {gene} on {alt_ac}"],
566
579
  )
567
580
 
568
581
  tx_genomic_coords, w = await self.uta_db.get_tx_exons_genomic_coords(
569
582
  tx_ac=transcript, alt_ac=alt_ac
570
583
  )
571
584
  if not tx_genomic_coords:
572
- return self._return_warnings(resp, w)
585
+ return self._return_warnings(resp, [w])
573
586
 
574
587
  # Check if breakpoint occurs on an exon.
575
588
  # If not, determine the adjacent exon given the selected transcript
@@ -603,7 +616,7 @@ class ExonGenomicCoordsMapper:
603
616
  # Check if valid accession is given
604
617
  if not await self.uta_db.validate_genomic_ac(alt_ac):
605
618
  return self._return_warnings(
606
- resp, f"Invalid genomic accession: {alt_ac}"
619
+ resp, [f"Invalid genomic accession: {alt_ac}"]
607
620
  )
608
621
 
609
622
  genes_alt_acs, warning = await self.uta_db.get_genes_and_alt_acs(
@@ -626,11 +639,11 @@ class ExonGenomicCoordsMapper:
626
639
  genes_alt_acs = None
627
640
 
628
641
  if not genes_alt_acs:
629
- return self._return_warnings(resp, warning)
642
+ return self._return_warnings(resp, [warning])
630
643
 
631
644
  gene_alt_ac, warning = self._get_gene_and_alt_ac(genes_alt_acs, gene)
632
645
  if not gene_alt_ac:
633
- return self._return_warnings(resp, warning)
646
+ return self._return_warnings(resp, [warning])
634
647
  gene, alt_ac = gene_alt_ac
635
648
 
636
649
  if transcript is None:
@@ -638,7 +651,7 @@ class ExonGenomicCoordsMapper:
638
651
  params, gene, alt_ac, pos, strand, is_start
639
652
  )
640
653
  if warnings:
641
- return self._return_warnings(resp, warnings)
654
+ return self._return_warnings(resp, [warnings])
642
655
  else:
643
656
  params["transcript"] = transcript
644
657
  params["gene"] = gene
@@ -646,7 +659,7 @@ class ExonGenomicCoordsMapper:
646
659
  params["chr"] = alt_ac
647
660
  warning = await self._set_genomic_data(params, strand, is_start)
648
661
  if warning:
649
- return self._return_warnings(resp, warning)
662
+ return self._return_warnings(resp, [warning])
650
663
 
651
664
  resp.transcript_exon_data = TranscriptExonData(**params)
652
665
  return resp
@@ -726,7 +739,7 @@ class ExonGenomicCoordsMapper:
726
739
  msg = f"Unable to find mane data for {alt_ac} with position {pos}"
727
740
  if gene:
728
741
  msg += f" on gene {gene}"
729
- logger.warning(msg)
742
+ _logger.warning(msg)
730
743
  return msg
731
744
 
732
745
  params["gene"] = mane_data.gene
@@ -750,7 +763,7 @@ class ExonGenomicCoordsMapper:
750
763
  f"{params['transcript']} with position {tx_pos} "
751
764
  f"does not exist on exons: {tx_exons}"
752
765
  )
753
- logger.warning(msg)
766
+ _logger.warning(msg)
754
767
  return msg
755
768
 
756
769
  strand_to_use = strand if strand is not None else mane_data.strand
@@ -805,7 +818,7 @@ class ExonGenomicCoordsMapper:
805
818
  return f"Unable to get chromosome and assembly for " f"{params['chr']}"
806
819
 
807
820
  chromosome_number, assembly = descr
808
- liftover_data = self.uta_db.get_liftover(
821
+ liftover_data = self.liftover.get_liftover(
809
822
  chromosome_number, params["pos"], Assembly.GRCH38
810
823
  )
811
824
  if liftover_data is None:
@@ -0,0 +1,90 @@
1
+ """Module for mapping to/from human genome assemblies.
2
+
3
+ Currently only supports GRCh37 <-> GRCh38
4
+ """
5
+
6
+ import logging
7
+ from os import environ
8
+
9
+ from agct import Converter, Genome
10
+
11
+ from cool_seq_tool.schemas import Assembly
12
+ from cool_seq_tool.utils import process_chromosome_input
13
+
14
+ # Environment variables for paths to chain files for agct
15
+ LIFTOVER_CHAIN_37_TO_38 = environ.get("LIFTOVER_CHAIN_37_TO_38")
16
+ LIFTOVER_CHAIN_38_TO_37 = environ.get("LIFTOVER_CHAIN_38_TO_37")
17
+
18
+
19
+ _logger = logging.getLogger(__name__)
20
+
21
+
22
+ class LiftOver:
23
+ """Class for mapping to/from human genome assemblies
24
+
25
+ Currently only supports GRCh37 <-> GRCh38
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ chain_file_37_to_38: str | None = None,
31
+ chain_file_38_to_37: str | None = None,
32
+ ) -> None:
33
+ """Initialize liftover class
34
+
35
+ :param chain_file_37_to_38: Optional path to chain file for 37 to 38 assembly.
36
+ This is used for ``agct``. If this is not provided, will check to see
37
+ if ``LIFTOVER_CHAIN_37_TO_38`` env var is set. If neither is provided, will
38
+ allow ``agct`` to download a chain file from UCSC
39
+ :param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly.
40
+ This is used for ``agct``. If this is not provided, will check to see
41
+ if ``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will
42
+ allow ``agct`` to download a chain file from UCSC
43
+ """
44
+ self.from_37_to_38 = Converter(
45
+ chainfile=chain_file_37_to_38 or LIFTOVER_CHAIN_37_TO_38,
46
+ from_db=Genome.HG19,
47
+ to_db=Genome.HG38,
48
+ )
49
+ self.from_38_to_37 = Converter(
50
+ chainfile=chain_file_38_to_37 or LIFTOVER_CHAIN_38_TO_37,
51
+ from_db=Genome.HG38,
52
+ to_db=Genome.HG19,
53
+ )
54
+
55
+ def get_liftover(
56
+ self, chromosome: str, pos: int, liftover_to_assembly: Assembly
57
+ ) -> tuple[str, int] | None:
58
+ """Get new genome assembly data for a position on a chromosome.
59
+
60
+ Use a UCSC-style chromosome name:
61
+
62
+ >>> from cool_seq_tool.mappers import LiftOver
63
+ >>> from cool_seq_tool.schemas import Assembly
64
+ >>> lo = LiftOver()
65
+ >>> lo.get_liftover("chr7", 140453136, Assembly.GRCH38)
66
+ ('chr7', 140753336)
67
+
68
+ Chromosome names can also be NCBI-style, without prefixes:
69
+
70
+ >>> lo.get_liftover("7", 140453136, Assembly.GRCH38)
71
+ ('chr7', 140753336)
72
+
73
+ :param chromosome: The chromosome number, e.g. ``"chr7"``, ``"chrX"``, ``"5"``.
74
+ :param pos: Position on the chromosome
75
+ :param liftover_to_assembly: Assembly to liftover to
76
+ :return: Target chromosome and target position for assembly
77
+ """
78
+ chromosome = process_chromosome_input(chromosome, "LiftOver.get_liftover()")
79
+ if liftover_to_assembly == Assembly.GRCH38:
80
+ liftover = self.from_37_to_38.convert_coordinate(chromosome, pos)
81
+ elif liftover_to_assembly == Assembly.GRCH37:
82
+ liftover = self.from_38_to_37.convert_coordinate(chromosome, pos)
83
+ else:
84
+ _logger.warning("%s assembly not supported", liftover_to_assembly)
85
+ liftover = None
86
+
87
+ if not liftover:
88
+ _logger.warning("%s does not exist on %s", pos, chromosome)
89
+ return None
90
+ return liftover[0][:2]