cool-seq-tool 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,8 +12,16 @@ import boto3
12
12
  import polars as pl
13
13
  from asyncpg.exceptions import InterfaceError, InvalidAuthorizationSpecificationError
14
14
  from botocore.exceptions import ClientError
15
-
16
- from cool_seq_tool.schemas import AnnotationLayer, Assembly, Strand
15
+ from pydantic import Field, StrictInt, StrictStr
16
+
17
+ from cool_seq_tool.schemas import (
18
+ AnnotationLayer,
19
+ Assembly,
20
+ BaseModelForbidExtra,
21
+ GenomicTxData,
22
+ GenomicTxMetadata,
23
+ Strand,
24
+ )
17
25
 
18
26
  # use `bound` to upper-bound UtaDatabase or child classes
19
27
  UTADatabaseType = TypeVar("UTADatabaseType", bound="UtaDatabase")
@@ -25,6 +33,52 @@ UTA_DB_URL = environ.get(
25
33
  _logger = logging.getLogger(__name__)
26
34
 
27
35
 
36
+ class DbConnectionArgs(BaseModelForbidExtra):
37
+ """Represent database connection arguments"""
38
+
39
+ host: str
40
+ port: int
41
+ user: str
42
+ password: str
43
+ database: str
44
+
45
+
46
+ class GenomicAlnData(BaseModelForbidExtra):
47
+ """Represent genomic alignment data from UTA tx_exon_aln_v view"""
48
+
49
+ hgnc: StrictStr = Field(..., description="HGNC gene symbol.")
50
+ ord: StrictInt = Field(..., description="Exon number. 0-based.")
51
+ alt_ac: StrictStr = Field(..., description="RefSeq genomic accession.")
52
+ alt_start_i: StrictInt = Field(
53
+ ...,
54
+ description="`alt_ac`'s start index of the exon using inter-residue coordinates.",
55
+ )
56
+ alt_end_i: StrictInt = Field(
57
+ ...,
58
+ description="`alt_ac`'s end index of the exon using inter-residue coordinates.",
59
+ )
60
+ alt_strand: Strand = Field(..., description="Strand.")
61
+
62
+
63
+ class TxExonAlnData(GenomicAlnData):
64
+ """Represent data from UTA tx_exon_aln_v view"""
65
+
66
+ tx_ac: StrictStr = Field(..., description="Transcript accession.")
67
+ tx_start_i: StrictInt = Field(
68
+ ...,
69
+ description="`tx_ac`'s start index of the exon using inter-residue coordinates.",
70
+ )
71
+ tx_end_i: StrictInt = Field(
72
+ ...,
73
+ description="`tx_ac`'s end index of the exon using inter-residue coordinates.",
74
+ )
75
+ alt_aln_method: StrictStr = Field(
76
+ ..., description="The alignment method used to compare sequences."
77
+ )
78
+ tx_exon_id: StrictInt = Field(..., description="`tx_ac` exon identifier.")
79
+ alt_exon_id: StrictInt = Field(..., description="`alt_ac` exon identifier.")
80
+
81
+
28
82
  class UtaDatabase:
29
83
  """Provide transcript lookup and metadata tools via the Universal Transcript Archive
30
84
  (UTA) database.
@@ -51,11 +105,11 @@ class UtaDatabase:
51
105
  self.db_url = db_url.replace(original_pwd, quote(original_pwd))
52
106
  self.args = self._get_conn_args()
53
107
 
54
- def _get_conn_args(self) -> dict:
108
+ def _get_conn_args(self) -> DbConnectionArgs:
55
109
  """Return connection arguments.
56
110
 
57
111
  :param db_url: raw connection URL
58
- :return: Database credentials
112
+ :return: Database connection arguments
59
113
  """
60
114
  if "UTA_DB_PROD" in environ:
61
115
  secret = ast.literal_eval(self.get_secret())
@@ -72,23 +126,24 @@ class UtaDatabase:
72
126
  environ["UTA_DB_URL"] = (
73
127
  f"postgresql://{username}@{host}:{port}/{database}/{schema}"
74
128
  )
75
- return {
76
- "host": host,
77
- "port": int(port),
78
- "database": database,
79
- "user": username,
80
- "password": password,
81
- }
129
+ return DbConnectionArgs(
130
+ host=host,
131
+ port=int(port),
132
+ database=database,
133
+ user=username,
134
+ password=password,
135
+ )
136
+
82
137
  url = ParseResult(urlparse(self.db_url))
83
138
  self.schema = url.schema
84
139
  password = unquote(url.password) if url.password else ""
85
- return {
86
- "host": url.hostname,
87
- "port": url.port,
88
- "database": url.database,
89
- "user": url.username,
90
- "password": password,
91
- }
140
+ return DbConnectionArgs(
141
+ host=url.hostname,
142
+ port=url.port,
143
+ database=url.database,
144
+ user=url.username,
145
+ password=password,
146
+ )
92
147
 
93
148
  async def create_pool(self) -> None:
94
149
  """Create connection pool if not already created."""
@@ -100,11 +155,11 @@ class UtaDatabase:
100
155
  max_size=10,
101
156
  max_inactive_connection_lifetime=3,
102
157
  command_timeout=60,
103
- host=self.args["host"],
104
- port=self.args["port"],
105
- user=self.args["user"],
106
- password=self.args["password"],
107
- database=self.args["database"],
158
+ host=self.args.host,
159
+ port=self.args.port,
160
+ user=self.args.user,
161
+ password=self.args.password,
162
+ database=self.args.database,
108
163
  )
109
164
  except InterfaceError as e:
110
165
  _logger.error(
@@ -215,156 +270,21 @@ class UtaDatabase:
215
270
  """
216
271
  return [list(i) for i in li]
217
272
 
218
- async def get_genes_and_alt_acs(
219
- self,
220
- pos: int,
221
- strand: Strand | None = None,
222
- chromosome: int | None = None,
223
- alt_ac: str | None = None,
224
- gene: str | None = None,
225
- ) -> tuple[dict | None, str | None]:
226
- """Return genes and genomic accessions for a position on a chromosome or alt_ac
227
-
228
- :param pos: Genomic position
229
- :param strand: Strand
230
- :param chromosome: Chromosome. Must give chromosome without a prefix
231
- (i.e. ``1`` or ``X``). If not provided, must provide ``alt_ac``.
232
- If ``alt_ac`` is also provided, ``alt_ac`` will be used.
233
- :param alt_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided,
234
- must provide ``chromosome``. If ``chromosome`` is also provided, ``alt_ac``
235
- will be used.
236
- :param gene: Gene symbol
237
- :return: Dictionary containing genes and genomic accessions and warnings if found
238
- """
239
- alt_ac_cond = (
240
- f"WHERE alt_ac = '{alt_ac}'"
241
- if alt_ac
242
- else f"WHERE alt_ac ~ '^NC_[0-9]+0{chromosome}.[0-9]+$'"
243
- )
244
- strand_cond = f"AND alt_strand = '{strand.value}'" if strand else ""
245
- gene_cond = f"AND hgnc = '{gene}'" if gene else ""
246
-
247
- query = f"""
248
- SELECT hgnc, alt_ac
249
- FROM {self.schema}.tx_exon_aln_v
250
- {alt_ac_cond}
251
- AND alt_aln_method = 'splign'
252
- AND {pos} BETWEEN alt_start_i AND alt_end_i
253
- {strand_cond}
254
- {gene_cond};
255
- """ # noqa: S608
256
-
257
- results = await self.execute_query(query)
258
- if not results:
259
- msg = (
260
- f"Unable to find a result for chromosome "
261
- f"{alt_ac or chromosome} where genomic coordinate {pos}"
262
- f" is mapped between an exon's start and end coordinates"
263
- )
264
- if strand:
265
- msg += (
266
- f" on the "
267
- f"{'positive' if strand == Strand.POSITIVE else 'negative'} strand"
268
- )
269
- if gene:
270
- msg += f" and on gene {gene}"
271
- return None, msg
272
-
273
- results = self._transform_list(results)
274
- genes = set()
275
- alt_acs = set()
276
- for r in results:
277
- genes.add(r[0])
278
- alt_acs.add(r[1])
279
- return {"genes": genes, "alt_acs": alt_acs}, None
280
-
281
- async def get_tx_exons(
282
- self, tx_ac: str, alt_ac: str | None = None
283
- ) -> tuple[list[tuple[int, int]] | None, str | None]:
284
- """Get list of transcript exons start/end coordinates.
285
-
286
- :param tx_ac: Transcript accession
287
- :param alt_ac: Genomic accession
288
- :return: List of a transcript's accessions and warnings if found
289
- """
290
- if alt_ac:
291
- # We know what assembly we're looking for since we have the
292
- # genomic accession
293
- query = f"""
294
- SELECT DISTINCT tx_start_i, tx_end_i
295
- FROM {self.schema}.tx_exon_aln_v
296
- WHERE tx_ac = '{tx_ac}'
297
- AND alt_aln_method = 'splign'
298
- AND alt_ac = '{alt_ac}'
299
- """ # noqa: S608
300
- else:
301
- # Use GRCh38 by default if no genomic accession is provided
302
- query = f"""
303
- SELECT DISTINCT tx_start_i, tx_end_i
304
- FROM {self.schema}.tx_exon_aln_v as t
305
- INNER JOIN {self.schema}._seq_anno_most_recent as s
306
- ON t.alt_ac = s.ac
307
- WHERE s.descr = ''
308
- AND t.tx_ac = '{tx_ac}'
309
- AND t.alt_aln_method = 'splign'
310
- AND t.alt_ac like 'NC_000%'
311
- """ # noqa: S608
312
- result = await self.execute_query(query)
313
-
314
- if not result:
315
- msg = f"Unable to get exons for {tx_ac}"
316
- _logger.warning(msg)
317
- return None, msg
318
- tx_exons = [(r["tx_start_i"], r["tx_end_i"]) for r in result]
319
- return tx_exons, None
320
-
321
- async def get_tx_exons_genomic_coords(
322
- self,
323
- tx_ac: str,
324
- alt_ac: str,
325
- ) -> tuple[tuple[int, int, int, int, int] | None, str | None]:
326
- """Get exon number, transcript coordinates, and genomic coordinates
327
-
328
- :param tx_ac: Transcript accession
329
- :param alt_ac: RefSeq genomic accession
330
- :return: Tuple of exon numbers, transcript and genomic coordinates,
331
- and warnings if found
332
- """
333
- query = f"""
334
- SELECT DISTINCT ord, tx_start_i, tx_end_i, alt_start_i, alt_end_i
335
- FROM {self.schema}.tx_exon_aln_v
336
- WHERE tx_ac = '{tx_ac}'
337
- AND alt_ac = '{alt_ac}'
338
- """ # noqa: S608
339
- result = await self.execute_query(query)
340
-
341
- if not result:
342
- msg = f"Unable to get exons and genomic coordinates for {tx_ac} on {alt_ac}"
343
- _logger.warning(msg)
344
- return None, msg
345
- tx_exons_genomic_coords = [
346
- (r["ord"], r["tx_start_i"], r["tx_end_i"], r["alt_start_i"], r["alt_end_i"])
347
- for r in result
348
- ]
349
- return tx_exons_genomic_coords, None
350
-
351
273
  async def get_alt_ac_start_or_end(
352
274
  self, tx_ac: str, tx_exon_start: int, tx_exon_end: int, gene: str | None
353
- ) -> tuple[tuple[str, str, int, int, int] | None, str | None]:
275
+ ) -> tuple[GenomicAlnData | None, str | None]:
354
276
  """Get genomic data for related transcript exon start or end.
355
277
 
356
278
  :param tx_ac: Transcript accession
357
279
  :param tx_exon_start: Transcript's exon start coordinate
358
280
  :param tx_exon_end: Transcript's exon end coordinate
359
281
  :param gene: HGNC gene symbol
360
- :return: [hgnc symbol, genomic accession for chromosome,
361
- aligned genomic start coordinate, aligned genomic end coordinate, strand],
362
- and warnings if found
282
+ :return: Genomic alignment data and warnings if found
363
283
  """
364
284
  gene_query = f"AND T.hgnc = '{gene}'" if gene else ""
365
285
 
366
286
  query = f"""
367
- SELECT T.hgnc, T.alt_ac, T.alt_start_i, T.alt_end_i, T.alt_strand
287
+ SELECT T.hgnc, T.alt_ac, T.alt_start_i, T.alt_end_i, T.alt_strand, T.ord
368
288
  FROM {self.schema}._cds_exons_fp_v as C
369
289
  JOIN {self.schema}.tx_exon_aln_v as T ON T.tx_ac = C.tx_ac
370
290
  WHERE T.tx_ac = '{tx_ac}'
@@ -387,8 +307,7 @@ class UtaDatabase:
387
307
  msg += f" on gene {gene}"
388
308
  _logger.warning(msg)
389
309
  return None, msg
390
- result = result[0]
391
- return (result[0], result[1], result[2], result[3], result[4]), None
310
+ return GenomicAlnData(**result[0]), None
392
311
 
393
312
  async def get_cds_start_end(self, tx_ac: str) -> tuple[int, int] | None:
394
313
  """Get coding start and end site
@@ -497,7 +416,7 @@ class UtaDatabase:
497
416
  alt_ac: str | None = None,
498
417
  use_tx_pos: bool = True,
499
418
  like_tx_ac: bool = False,
500
- ) -> list:
419
+ ) -> list[TxExonAlnData]:
501
420
  """Return queried data from tx_exon_aln_v table.
502
421
 
503
422
  :param tx_ac: accession on c. coordinate
@@ -511,11 +430,8 @@ class UtaDatabase:
511
430
  :param like_tx_ac: ``True`` if tx_ac condition should be a like statement.
512
431
  This is used when you want to query an accession regardless of its version
513
432
  ``False`` if tx_condition will be exact match
514
- :return: List of tx_exon_aln_v data
433
+ :return: List of transcript exon alignment data
515
434
  """
516
- if end_pos is None:
517
- end_pos = start_pos
518
-
519
435
  if tx_ac.startswith("EN"):
520
436
  temp_ac = tx_ac.split(".")[0]
521
437
  aln_method = f"AND alt_aln_method='genebuild'" # noqa: F541
@@ -543,7 +459,7 @@ class UtaDatabase:
543
459
 
544
460
  query = f"""
545
461
  SELECT hgnc, tx_ac, tx_start_i, tx_end_i, alt_ac, alt_start_i,
546
- alt_end_i, alt_strand, alt_aln_method, tx_exon_id, alt_exon_id
462
+ alt_end_i, alt_strand, alt_aln_method, ord, tx_exon_id, alt_exon_id
547
463
  FROM {self.schema}.tx_exon_aln_v
548
464
  {tx_q}
549
465
  {alt_ac_q}
@@ -562,22 +478,17 @@ class UtaDatabase:
562
478
  temp_ac,
563
479
  alt_ac,
564
480
  )
565
- return [list(r) for r in result]
481
+ return [TxExonAlnData(**r) for r in result]
566
482
 
567
483
  @staticmethod
568
- def data_from_result(result: list) -> dict | None:
484
+ def data_from_result(result: TxExonAlnData) -> GenomicTxData | None:
569
485
  """Return data found from result.
570
486
 
571
- :param result: Data from tx_exon_aln_v table
572
- :return: Gene, strand, and position ranges for tx and alt_ac
487
+ :param result: Transcript exon alignment data
488
+ :return: Aligned genomic / transcript exon data
573
489
  """
574
- gene = result[0]
575
- tx_pos_range = result[2], result[3]
576
- alt_pos_range = result[5], result[6]
577
- strand = Strand(result[7])
578
- alt_aln_method = result[8]
579
- tx_exon_id = result[9]
580
- alt_exon_id = result[10]
490
+ tx_pos_range = result.tx_start_i, result.tx_end_i
491
+ alt_pos_range = result.alt_start_i, result.alt_end_i
581
492
 
582
493
  if (tx_pos_range[1] - tx_pos_range[0]) != (alt_pos_range[1] - alt_pos_range[0]):
583
494
  _logger.warning(
@@ -587,19 +498,19 @@ class UtaDatabase:
587
498
  )
588
499
  return None
589
500
 
590
- return {
591
- "gene": gene,
592
- "strand": strand,
593
- "tx_pos_range": tx_pos_range,
594
- "alt_pos_range": alt_pos_range,
595
- "alt_aln_method": alt_aln_method,
596
- "tx_exon_id": tx_exon_id,
597
- "alt_exon_id": alt_exon_id,
598
- }
501
+ return GenomicTxData(
502
+ gene=result.hgnc,
503
+ strand=Strand(result.alt_strand),
504
+ tx_pos_range=tx_pos_range,
505
+ alt_pos_range=alt_pos_range,
506
+ alt_aln_method=result.alt_aln_method,
507
+ tx_exon_id=result.tx_exon_id,
508
+ alt_exon_id=result.alt_exon_id,
509
+ )
599
510
 
600
511
  async def get_mane_c_genomic_data(
601
512
  self, ac: str, alt_ac: str | None, start_pos: int, end_pos: int
602
- ) -> dict | None:
513
+ ) -> GenomicTxMetadata | None:
603
514
  """Get MANE transcript and genomic data. Used when going from g. to MANE c.
604
515
  representation.
605
516
 
@@ -623,7 +534,8 @@ class UtaDatabase:
623
534
  be set to ``None`` if unavailable.
624
535
  :param start_pos: Genomic start position
625
536
  :param end_pos: Genomic end position change
626
- :return: MANE transcript results if successful
537
+ :return: Metadata for MANE genomic and transcript accessions results if
538
+ successful
627
539
  """
628
540
  results = await self.get_tx_exon_aln_v_data(
629
541
  ac, start_pos, end_pos, alt_ac=alt_ac, use_tx_pos=False
@@ -632,8 +544,8 @@ class UtaDatabase:
632
544
  return None
633
545
  result = results[0]
634
546
 
635
- data = self.data_from_result(result)
636
- if not data:
547
+ genomic_tx_data = self.data_from_result(result)
548
+ if not genomic_tx_data:
637
549
  return None
638
550
 
639
551
  coding_start_site = await self.get_cds_start_end(ac)
@@ -641,25 +553,30 @@ class UtaDatabase:
641
553
  _logger.warning("Accession %s not found in UTA", ac)
642
554
  return None
643
555
 
644
- data["tx_ac"] = result[1]
645
- data["alt_ac"] = result[4]
646
- data["coding_start_site"] = coding_start_site[0]
647
- data["coding_end_site"] = coding_start_site[1]
556
+ coding_start_site, coding_end_site = coding_start_site
648
557
 
649
- if data["strand"] == Strand.NEGATIVE:
650
- data["alt_pos_change_range"] = (end_pos, start_pos)
651
- data["alt_pos_change"] = (
652
- data["alt_pos_range"][1] - data["alt_pos_change_range"][0],
653
- data["alt_pos_change_range"][1] - data["alt_pos_range"][0],
558
+ if genomic_tx_data.strand == Strand.NEGATIVE:
559
+ alt_pos_change_range = (end_pos, start_pos)
560
+ pos_change = (
561
+ genomic_tx_data.alt_pos_range[1] - alt_pos_change_range[0],
562
+ alt_pos_change_range[1] - genomic_tx_data.alt_pos_range[0],
654
563
  )
655
564
  else:
656
- data["alt_pos_change_range"] = (start_pos, end_pos)
657
- data["alt_pos_change"] = (
658
- data["alt_pos_change_range"][0] - data["alt_pos_range"][0],
659
- data["alt_pos_range"][1] - data["alt_pos_change_range"][1],
565
+ alt_pos_change_range = (start_pos, end_pos)
566
+ pos_change = (
567
+ alt_pos_change_range[0] - genomic_tx_data.alt_pos_range[0],
568
+ genomic_tx_data.alt_pos_range[1] - alt_pos_change_range[1],
660
569
  )
661
570
 
662
- return data
571
+ return GenomicTxMetadata(
572
+ **genomic_tx_data.model_dump(),
573
+ pos_change=pos_change,
574
+ tx_ac=result.tx_ac,
575
+ alt_ac=result.alt_ac,
576
+ coding_start_site=coding_start_site,
577
+ coding_end_site=coding_end_site,
578
+ alt_pos_change_range=alt_pos_change_range,
579
+ )
663
580
 
664
581
  async def get_genomic_tx_data(
665
582
  self,
@@ -669,7 +586,7 @@ class UtaDatabase:
669
586
  | Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.CDNA,
670
587
  alt_ac: str | None = None,
671
588
  target_genome_assembly: Assembly = Assembly.GRCH38,
672
- ) -> dict | None:
589
+ ) -> GenomicTxMetadata | None:
673
590
  """Get transcript mapping to genomic data.
674
591
 
675
592
  :param tx_ac: Accession on c. coordinate
@@ -678,8 +595,7 @@ class UtaDatabase:
678
595
  :param alt_ac: Accession on g. coordinate
679
596
  :param target_genome_assembly: Genome assembly to get genomic data for.
680
597
  If ``alt_ac`` is provided, it will return the associated assembly.
681
- :return: Gene, Transcript accession and position change,
682
- Altered transcript accession and position change, Strand
598
+ :return: Metadata for genomic and transcript accessions
683
599
  """
684
600
  results = await self.get_tx_exon_aln_v_data(
685
601
  tx_ac,
@@ -696,35 +612,39 @@ class UtaDatabase:
696
612
  else:
697
613
  result = results[0]
698
614
 
699
- data = self.data_from_result(result)
700
- if not data:
615
+ genomic_tx_data = self.data_from_result(result)
616
+ if not genomic_tx_data:
701
617
  return None
702
- data["tx_ac"] = result[1]
703
- data["alt_ac"] = result[4]
704
618
 
705
- data["pos_change"] = (
706
- pos[0] - data["tx_pos_range"][0],
707
- data["tx_pos_range"][1] - pos[1],
619
+ pos_change = (
620
+ pos[0] - genomic_tx_data.tx_pos_range[0],
621
+ genomic_tx_data.tx_pos_range[1] - pos[1],
708
622
  )
709
623
 
710
624
  if annotation_layer == AnnotationLayer.CDNA:
711
- if data["strand"] == Strand.NEGATIVE:
712
- data["alt_pos_change_range"] = (
713
- data["alt_pos_range"][1] - data["pos_change"][0],
714
- data["alt_pos_range"][0] + data["pos_change"][1],
625
+ if genomic_tx_data.strand == Strand.NEGATIVE:
626
+ alt_pos_change_range = (
627
+ genomic_tx_data.alt_pos_range[1] - pos_change[0],
628
+ genomic_tx_data.alt_pos_range[0] + pos_change[1],
715
629
  )
716
630
  else:
717
- data["alt_pos_change_range"] = (
718
- data["alt_pos_range"][0] + data["pos_change"][0],
719
- data["alt_pos_range"][1] - data["pos_change"][1],
631
+ alt_pos_change_range = (
632
+ genomic_tx_data.alt_pos_range[0] + pos_change[0],
633
+ genomic_tx_data.alt_pos_range[1] - pos_change[1],
720
634
  )
721
635
  else:
722
- if data["strand"] == Strand.NEGATIVE:
723
- data["alt_pos_change_range"] = (pos[1], pos[0])
636
+ if genomic_tx_data.strand == Strand.NEGATIVE:
637
+ alt_pos_change_range = (pos[1], pos[0])
724
638
  else:
725
- data["alt_pos_change_range"] = pos
726
-
727
- return data
639
+ alt_pos_change_range = pos
640
+
641
+ return GenomicTxMetadata(
642
+ **genomic_tx_data.model_dump(),
643
+ tx_ac=result.tx_ac,
644
+ alt_ac=result.alt_ac,
645
+ pos_change=pos_change,
646
+ alt_pos_change_range=alt_pos_change_range,
647
+ )
728
648
 
729
649
  async def get_ac_from_gene(self, gene: str) -> list[str]:
730
650
  """Return genomic accession(s) associated to a gene.
cool_seq_tool/utils.py CHANGED
@@ -6,35 +6,35 @@ import logging
6
6
  from bioutils.accessions import chr22XY
7
7
 
8
8
  from cool_seq_tool import __version__
9
- from cool_seq_tool.schemas import ResidueMode, ServiceMeta
9
+ from cool_seq_tool.schemas import CoordinateType, ServiceMeta
10
10
 
11
11
  _logger = logging.getLogger(__name__)
12
12
 
13
13
 
14
14
  def get_inter_residue_pos(
15
- start_pos: int, end_pos: int, residue_mode: ResidueMode
15
+ start_pos: int, end_pos: int, coordinate_type: CoordinateType
16
16
  ) -> tuple[int, int]:
17
17
  """Return equivalent inter-residue position.
18
18
 
19
- Generally, we prefer to work with inter-residue coordinates where possible. Our
19
+ Residue coordinates start with 1, whereas inter-residue coordinates start with 0.
20
+
21
+ It is preferred to work with inter-residue coordinates where possible. Our
20
22
  rationale is detailed in an appendix to the
21
23
  `VRS docs <https://vrs.ga4gh.org/en/stable/appendices/design_decisions.html#inter-residue-coordinates>`_.
22
24
  This function is used internally to shift user-provided coordinates accordingly.
23
25
 
24
26
  >>> from cool_seq_tool.utils import get_inter_residue_pos
25
- >>> from cool_seq_tool.schemas import ResidueMode
26
- >>> get_inter_residue_pos(10, ResidueMode.RESIDUE)
27
+ >>> from cool_seq_tool.schemas import CoordinateType
28
+ >>> get_inter_residue_pos(10, CoordinateType.RESIDUE)
27
29
  ((9, 9), None)
28
30
 
29
31
  :param start_pos: Start position
30
32
  :param end_pos: End position
31
- :param residue_mode: Residue mode for `start_pos` and `end_pos`
33
+ :param coordinate_type: Coordinate type for `start_pos` and `end_pos`
32
34
  :return: Inter-residue coordinates
33
35
  """
34
- if residue_mode == ResidueMode.RESIDUE:
36
+ if coordinate_type == CoordinateType.RESIDUE:
35
37
  start_pos -= 1
36
- elif residue_mode == ResidueMode.ZERO:
37
- end_pos += 1
38
38
  return start_pos, end_pos
39
39
 
40
40
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cool_seq_tool
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -52,7 +52,7 @@ Requires-Dist: polars ~=1.0
52
52
  Requires-Dist: hgvs
53
53
  Requires-Dist: biocommons.seqrepo
54
54
  Requires-Dist: pydantic ==2.*
55
- Requires-Dist: ga4gh.vrs
55
+ Requires-Dist: ga4gh.vrs ~=2.0.0a10
56
56
  Requires-Dist: wags-tails ~=0.1.3
57
57
  Requires-Dist: bioutils
58
58
  Provides-Extra: dev
@@ -83,7 +83,7 @@ CoolSeqTool
83
83
 
84
84
  ---
85
85
 
86
- **[Documentation](https://coolseqtool.readthedocs.io/latest/)** · [Installation](https://coolseqtool.readthedocs.io/latest/install.html) · [Usage](https://coolseqtool.readthedocs.io/latest/usage.html) · [API reference](https://coolseqtool.readthedocs.io/latest/reference/index.html)
86
+ **[Documentation](https://coolseqtool.readthedocs.io/stable/)** · [Installation](https://coolseqtool.readthedocs.io/stable/install.html) · [Usage](https://coolseqtool.readthedocs.io/stable/usage.html) · [API reference](https://coolseqtool.readthedocs.io/stable/reference/index.html)
87
87
 
88
88
  ---
89
89
 
@@ -107,7 +107,7 @@ CoolSeqTool is available on [PyPI](https://pypi.org/project/cool-seq-tool)
107
107
  python3 -m pip install cool-seq-tool
108
108
  ```
109
109
 
110
- See the [installation instructions](https://coolseqtool.readthedocs.io/latest/install.html) in the documentation for a description of dependency setup requirements.
110
+ See the [installation instructions](https://coolseqtool.readthedocs.io/stable/install.html) in the documentation for a description of dependency setup requirements.
111
111
 
112
112
  ---
113
113
 
@@ -116,14 +116,14 @@ See the [installation instructions](https://coolseqtool.readthedocs.io/latest/in
116
116
  All CoolSeqTool resources can be initialized by way of a top-level class instance:
117
117
 
118
118
  ```pycon
119
- >>> from cool_seq_tool.app import CoolSeqTool
120
- >>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
119
+ >>> from cool_seq_tool import CoolSeqTool
120
+ >>> from cool_seq_tool.schemas import AnnotationLayer, CoordinateType
121
121
  >>> cst = CoolSeqTool()
122
122
  >>> result = await cst.mane_transcript.get_mane_transcript(
123
123
  ... "NP_004324.2",
124
124
  ... 599,
125
125
  ... AnnotationLayer.PROTEIN,
126
- ... residue_mode=ResidueMode.INTER_RESIDUE,
126
+ ... coordinate_type=CoordinateType.INTER_RESIDUE,
127
127
  ... )
128
128
  >>> result.gene, result.refseq, result.status
129
129
  ('EGFR', 'NM_005228.5', <TranscriptPriority.MANE_SELECT: 'mane_select'>)
@@ -133,4 +133,4 @@ All CoolSeqTool resources can be initialized by way of a top-level class instanc
133
133
 
134
134
  ## Feedback and contributing
135
135
 
136
- We welcome bug reports, feature requests, and code contributions from users and interested collaborators. The [documentation](https://coolseqtool.readthedocs.io/latest/contributing.html) contains guidance for submitting feedback and contributing new code.
136
+ We welcome bug reports, feature requests, and code contributions from users and interested collaborators. The [documentation](https://coolseqtool.readthedocs.io/stable/contributing.html) contains guidance for submitting feedback and contributing new code.
@@ -0,0 +1,24 @@
1
+ cool_seq_tool/__init__.py,sha256=pJyVj7Z275BBAwpeFMm-WEn_tp-y1_ihRl1sLc4FFZY,400
2
+ cool_seq_tool/app.py,sha256=vyqlQRffC8sWZXMm-f_f-8WuTTWo3oRNfPUa_qdPV2M,4944
3
+ cool_seq_tool/schemas.py,sha256=HInmKpsujybVR6pRmkKNOIzPCBqk9Ni5q1ZKNFtip50,3945
4
+ cool_seq_tool/utils.py,sha256=kesu7UnOplDzvNBg_G-_m1xMM22979nmsi4yWtweetU,2959
5
+ cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
6
+ cool_seq_tool/handlers/seqrepo_access.py,sha256=Jd19jbdUvPRPn_XWozL67ph-nSIxpb4_UUimapDrsm4,9162
7
+ cool_seq_tool/mappers/__init__.py,sha256=O0JRxNFk8nWxD4v5ij47xelhvfVLdEXS43l2tzRuiUE,305
8
+ cool_seq_tool/mappers/alignment.py,sha256=nV6PS3mhkQ2MD1GcpNBujBOqd3AKxYSYA9BCusFOa1o,9636
9
+ cool_seq_tool/mappers/exon_genomic_coords.py,sha256=hfzfuxsNwMvj6y9thwWCj4WcOXamdnqvvd29gmX19Bo,48261
10
+ cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
11
+ cool_seq_tool/mappers/mane_transcript.py,sha256=nirxlf3EGVInFYG4fsAqiEmDdTc_h1XuPyX2ul-a7Rk,54368
12
+ cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
13
+ cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oIjqk2sYQM,3837
14
+ cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
15
+ cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
16
+ cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
17
+ cool_seq_tool/sources/mane_transcript_mappings.py,sha256=E_pj7FEBcB6HUR8yhSVibB0beMMlKJ62pK0qvl4y5nw,5358
18
+ cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
19
+ cool_seq_tool/sources/uta_database.py,sha256=gc5wsKOIhvzhwFmPmqOY0hhaVfRkRSzYNa9tpBt81_U,35017
20
+ cool_seq_tool-0.7.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
21
+ cool_seq_tool-0.7.0.dist-info/METADATA,sha256=UrSjQTJOgl4sqFvMG_p_TpeZW2R0GE6lMGus9NQhUew,6226
22
+ cool_seq_tool-0.7.0.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
23
+ cool_seq_tool-0.7.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
24
+ cool_seq_tool-0.7.0.dist-info/RECORD,,