cool-seq-tool 0.4.0.dev2__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. cool_seq_tool/__init__.py +1 -3
  2. cool_seq_tool/api.py +1 -2
  3. cool_seq_tool/app.py +42 -24
  4. cool_seq_tool/handlers/__init__.py +1 -0
  5. cool_seq_tool/handlers/seqrepo_access.py +13 -15
  6. cool_seq_tool/mappers/__init__.py +1 -0
  7. cool_seq_tool/mappers/alignment.py +5 -6
  8. cool_seq_tool/mappers/exon_genomic_coords.py +232 -68
  9. cool_seq_tool/mappers/mane_transcript.py +84 -86
  10. cool_seq_tool/resources/__init__.py +1 -0
  11. cool_seq_tool/resources/data_files.py +93 -0
  12. cool_seq_tool/resources/status.py +151 -0
  13. cool_seq_tool/routers/__init__.py +1 -0
  14. cool_seq_tool/routers/default.py +1 -0
  15. cool_seq_tool/routers/mane.py +4 -4
  16. cool_seq_tool/routers/mappings.py +2 -2
  17. cool_seq_tool/schemas.py +83 -37
  18. cool_seq_tool/sources/__init__.py +1 -0
  19. cool_seq_tool/sources/mane_transcript_mappings.py +14 -7
  20. cool_seq_tool/sources/transcript_mappings.py +41 -32
  21. cool_seq_tool/sources/uta_database.py +120 -69
  22. cool_seq_tool/utils.py +2 -2
  23. cool_seq_tool/version.py +2 -1
  24. {cool_seq_tool-0.4.0.dev2.dist-info → cool_seq_tool-0.4.1.dist-info}/LICENSE +1 -1
  25. {cool_seq_tool-0.4.0.dev2.dist-info → cool_seq_tool-0.4.1.dist-info}/METADATA +15 -8
  26. cool_seq_tool-0.4.1.dist-info/RECORD +29 -0
  27. {cool_seq_tool-0.4.0.dev2.dist-info → cool_seq_tool-0.4.1.dist-info}/WHEEL +1 -1
  28. cool_seq_tool/data/__init__.py +0 -2
  29. cool_seq_tool/data/data_downloads.py +0 -89
  30. cool_seq_tool/paths.py +0 -28
  31. cool_seq_tool-0.4.0.dev2.dist-info/RECORD +0 -29
  32. /cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
  33. {cool_seq_tool-0.4.0.dev2.dist-info → cool_seq_tool-0.4.1.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,10 @@
1
1
  """Provide transcript lookup and metadata tools via the UTA database."""
2
+
2
3
  import ast
3
4
  import base64
4
5
  import logging
5
6
  from os import environ
6
- from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
7
+ from typing import Any, Literal, TypeVar
7
8
  from urllib.parse import ParseResult as UrlLibParseResult
8
9
  from urllib.parse import quote, unquote, urlparse
9
10
 
@@ -24,12 +25,43 @@ LIFTOVER_CHAIN_37_TO_38 = environ.get("LIFTOVER_CHAIN_37_TO_38")
24
25
  LIFTOVER_CHAIN_38_TO_37 = environ.get("LIFTOVER_CHAIN_38_TO_37")
25
26
 
26
27
  UTA_DB_URL = environ.get(
27
- "UTA_DB_URL", "postgresql://uta_admin:uta@localhost:5433/uta/uta_20210129b"
28
+ "UTA_DB_URL", "postgresql://uta_admin:uta@localhost:5432/uta/uta_20210129b"
28
29
  )
29
30
 
30
31
  logger = logging.getLogger(__name__)
31
32
 
32
33
 
34
+ def get_liftover(
35
+ chain_file_37_to_38: str | None = None, chain_file_38_to_37: str | None = None
36
+ ) -> tuple[Converter, Converter]:
37
+ """Fetch Converter instances between GRCh37 and 38.
38
+
39
+ Factored out of the UTA Database initialization method to support less expensive
40
+ status check-type operations.
41
+
42
+ :param chain_file_37_to_38: Optional path to chain file for 37 to 38 assembly.
43
+ This is used for ``agct``. If this is not provided, will check to see
44
+ if ``LIFTOVER_CHAIN_37_TO_38`` env var is set. If neither is provided, will
45
+ allow ``agct`` to download a chain file from UCSC
46
+ :param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly.
47
+ This is used for ``agct``. If this is not provided, will check to see
48
+ if ``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will
49
+ allow ``agct`` to download a chain file from UCSC
50
+ :return: converters (37->38, 38->37)
51
+ """
52
+ chain_file_37_to_38 = chain_file_37_to_38 or LIFTOVER_CHAIN_37_TO_38
53
+ if chain_file_37_to_38:
54
+ converter_37_to_38 = Converter(chainfile=chain_file_37_to_38)
55
+ else:
56
+ converter_37_to_38 = Converter(from_db=Genome.HG19, to_db=Genome.HG38)
57
+ chain_file_38_to_37 = chain_file_38_to_37 or LIFTOVER_CHAIN_38_TO_37
58
+ if chain_file_38_to_37:
59
+ converter_38_to_37 = Converter(chainfile=chain_file_38_to_37)
60
+ else:
61
+ converter_38_to_37 = Converter(from_db=Genome.HG38, to_db=Genome.HG19)
62
+ return (converter_37_to_38, converter_38_to_37)
63
+
64
+
33
65
  class UtaDatabase:
34
66
  """Provide transcript lookup and metadata tools via the Universal Transcript Archive
35
67
  (UTA) database.
@@ -46,8 +78,8 @@ class UtaDatabase:
46
78
  def __init__(
47
79
  self,
48
80
  db_url: str = UTA_DB_URL,
49
- chain_file_37_to_38: Optional[str] = None,
50
- chain_file_38_to_37: Optional[str] = None,
81
+ chain_file_37_to_38: str | None = None,
82
+ chain_file_38_to_37: str | None = None,
51
83
  ) -> None:
52
84
  """Initialize DB class. Should only be used by ``create()`` method, and not
53
85
  be called directly by a user.
@@ -68,20 +100,11 @@ class UtaDatabase:
68
100
  original_pwd = db_url.split("//")[-1].split("@")[0].split(":")[-1]
69
101
  self.db_url = db_url.replace(original_pwd, quote(original_pwd))
70
102
  self.args = self._get_conn_args()
103
+ self.liftover_37_to_38, self.liftover_38_to_37 = get_liftover(
104
+ chain_file_37_to_38, chain_file_38_to_37
105
+ )
71
106
 
72
- chain_file_37_to_38 = chain_file_37_to_38 or LIFTOVER_CHAIN_37_TO_38
73
- if chain_file_37_to_38:
74
- self.liftover_37_to_38 = Converter(chainfile=chain_file_37_to_38)
75
- else:
76
- self.liftover_37_to_38 = Converter(from_db=Genome.HG19, to_db=Genome.HG38)
77
-
78
- chain_file_38_to_37 = chain_file_38_to_37 or LIFTOVER_CHAIN_38_TO_37
79
- if chain_file_38_to_37:
80
- self.liftover_38_to_37 = Converter(chainfile=chain_file_38_to_37)
81
- else:
82
- self.liftover_38_to_37 = Converter(from_db=Genome.HG38, to_db=Genome.HG19)
83
-
84
- def _get_conn_args(self) -> Dict:
107
+ def _get_conn_args(self) -> dict:
85
108
  """Return connection arguments.
86
109
 
87
110
  :param db_url: raw connection URL
@@ -99,9 +122,9 @@ class UtaDatabase:
99
122
  self.schema = schema
100
123
 
101
124
  environ["PGPASSWORD"] = password
102
- environ[
103
- "UTA_DB_URL"
104
- ] = f"postgresql://{username}@{host}:{port}/{database}/{schema}"
125
+ environ["UTA_DB_URL"] = (
126
+ f"postgresql://{username}@{host}:{port}/{database}/{schema}"
127
+ )
105
128
  return {
106
129
  "host": host,
107
130
  "port": int(port),
@@ -145,7 +168,7 @@ class UtaDatabase:
145
168
 
146
169
  @classmethod
147
170
  async def create(
148
- cls: Type[UTADatabaseType], db_url: str = UTA_DB_URL
171
+ cls: type[UTADatabaseType], db_url: str = UTA_DB_URL
149
172
  ) -> UTADatabaseType:
150
173
  """Manufacture a fully-initialized class instance (a la factory pattern). This
151
174
  method should be used instead of calling the class directly to create a new
@@ -173,7 +196,10 @@ class UtaDatabase:
173
196
  """
174
197
 
175
198
  async def _execute_query(q: str) -> Any: # noqa: ANN401
176
- async with self._connection_pool.acquire() as connection, connection.transaction():
199
+ async with (
200
+ self._connection_pool.acquire() as connection,
201
+ connection.transaction(),
202
+ ):
177
203
  return await connection.fetch(q)
178
204
 
179
205
  if not self._connection_pool:
@@ -234,25 +260,22 @@ class UtaDatabase:
234
260
  await self.execute_query(create_index)
235
261
 
236
262
  @staticmethod
237
- def _transform_list(li: List) -> List[List[Any]]:
263
+ def _transform_list(li: list) -> list[list[Any]]:
238
264
  """Transform list to only contain field values
239
265
 
240
266
  :param li: List of asyncpg.Record objects
241
267
  :return: List of list of objects
242
268
  """
243
- results = []
244
- for item in li:
245
- results.append(list(item))
246
- return results
269
+ return [list(i) for i in li]
247
270
 
248
271
  async def get_genes_and_alt_acs(
249
272
  self,
250
273
  pos: int,
251
- strand: Optional[Strand] = None,
252
- chromosome: Optional[int] = None,
253
- alt_ac: Optional[str] = None,
254
- gene: Optional[str] = None,
255
- ) -> Tuple[Optional[Dict], Optional[str]]:
274
+ strand: Strand | None = None,
275
+ chromosome: int | None = None,
276
+ alt_ac: str | None = None,
277
+ gene: str | None = None,
278
+ ) -> tuple[dict | None, str | None]:
256
279
  """Return genes and genomic accessions for a position on a chromosome or alt_ac
257
280
 
258
281
  :param pos: Genomic position
@@ -309,8 +332,8 @@ class UtaDatabase:
309
332
  return {"genes": genes, "alt_acs": alt_acs}, None
310
333
 
311
334
  async def get_tx_exons(
312
- self, tx_ac: str, alt_ac: Optional[str] = None
313
- ) -> Tuple[Optional[List[Tuple[int, int]]], Optional[str]]:
335
+ self, tx_ac: str, alt_ac: str | None = None
336
+ ) -> tuple[list[tuple[int, int]] | None, str | None]:
314
337
  """Get list of transcript exons start/end coordinates.
315
338
 
316
339
  :param tx_ac: Transcript accession
@@ -348,9 +371,39 @@ class UtaDatabase:
348
371
  tx_exons = [(r["tx_start_i"], r["tx_end_i"]) for r in result]
349
372
  return tx_exons, None
350
373
 
374
+ async def get_tx_exons_genomic_coords(
375
+ self,
376
+ tx_ac: str,
377
+ alt_ac: str,
378
+ ) -> tuple[tuple[int, int, int, int, int] | None, str | None]:
379
+ """Get exon number, transcript coordinates, and genomic coordinates
380
+
381
+ :param tx_ac: Transcript accession
382
+ :param alt_ac: RefSeq genomic accession
383
+ :return: Tuple of exon numbers, transcript and genomic coordinates,
384
+ and warnings if found
385
+ """
386
+ query = f"""
387
+ SELECT DISTINCT ord, tx_start_i, tx_end_i, alt_start_i, alt_end_i
388
+ FROM {self.schema}.tx_exon_aln_v
389
+ WHERE tx_ac = '{tx_ac}'
390
+ AND alt_ac = '{alt_ac}'
391
+ """ # noqa: S608
392
+ result = await self.execute_query(query)
393
+
394
+ if not result:
395
+ msg = f"Unable to get exons and genomic coordinates for {tx_ac} on {alt_ac}"
396
+ logger.warning(msg)
397
+ return None, msg
398
+ tx_exons_genomic_coords = [
399
+ (r["ord"], r["tx_start_i"], r["tx_end_i"], r["alt_start_i"], r["alt_end_i"])
400
+ for r in result
401
+ ]
402
+ return tx_exons_genomic_coords, None
403
+
351
404
  async def get_alt_ac_start_or_end(
352
- self, tx_ac: str, tx_exon_start: int, tx_exon_end: int, gene: Optional[str]
353
- ) -> Tuple[Optional[Tuple[str, str, int, int, int]], Optional[str]]:
405
+ self, tx_ac: str, tx_exon_start: int, tx_exon_end: int, gene: str | None
406
+ ) -> tuple[tuple[str, str, int, int, int] | None, str | None]:
354
407
  """Get genomic data for related transcript exon start or end.
355
408
 
356
409
  :param tx_ac: Transcript accession
@@ -390,7 +443,7 @@ class UtaDatabase:
390
443
  result = result[0]
391
444
  return (result[0], result[1], result[2], result[3], result[4]), None
392
445
 
393
- async def get_cds_start_end(self, tx_ac: str) -> Optional[Tuple[int, int]]:
446
+ async def get_cds_start_end(self, tx_ac: str) -> tuple[int, int] | None:
394
447
  """Get coding start and end site
395
448
 
396
449
  :param tx_ac: Transcript accession
@@ -414,7 +467,7 @@ class UtaDatabase:
414
467
  )
415
468
  return None
416
469
 
417
- async def get_newest_assembly_ac(self, ac: str) -> List[str]:
470
+ async def get_newest_assembly_ac(self, ac: str) -> list[str]:
418
471
  """Find accession associated to latest genomic assembly
419
472
 
420
473
  :param ac: Accession
@@ -459,7 +512,7 @@ class UtaDatabase:
459
512
  result = await self.execute_query(query)
460
513
  return result[0][0]
461
514
 
462
- async def get_ac_descr(self, ac: str) -> Optional[str]:
515
+ async def get_ac_descr(self, ac: str) -> str | None:
463
516
  """Return accession description. This is typically available only for accessions
464
517
  from older (pre-GRCh38) builds.
465
518
 
@@ -494,10 +547,10 @@ class UtaDatabase:
494
547
  tx_ac: str,
495
548
  start_pos: int,
496
549
  end_pos: int,
497
- alt_ac: Optional[str] = None,
550
+ alt_ac: str | None = None,
498
551
  use_tx_pos: bool = True,
499
552
  like_tx_ac: bool = False,
500
- ) -> List:
553
+ ) -> list:
501
554
  """Return queried data from tx_exon_aln_v table.
502
555
 
503
556
  :param tx_ac: accession on c. coordinate
@@ -562,13 +615,10 @@ class UtaDatabase:
562
615
  temp_ac,
563
616
  alt_ac,
564
617
  )
565
- results = []
566
- for r in result:
567
- results.append(list(r))
568
- return results
618
+ return [list(r) for r in result]
569
619
 
570
620
  @staticmethod
571
- def data_from_result(result: List) -> Optional[Dict]:
621
+ def data_from_result(result: list) -> dict | None:
572
622
  """Return data found from result.
573
623
 
574
624
  :param result: Data from tx_exon_aln_v table
@@ -601,8 +651,8 @@ class UtaDatabase:
601
651
  }
602
652
 
603
653
  async def get_mane_c_genomic_data(
604
- self, ac: str, alt_ac: Optional[str], start_pos: int, end_pos: int
605
- ) -> Optional[Dict]:
654
+ self, ac: str, alt_ac: str | None, start_pos: int, end_pos: int
655
+ ) -> dict | None:
606
656
  """Get MANE transcript and genomic data. Used when going from g. to MANE c.
607
657
  representation.
608
658
 
@@ -667,13 +717,12 @@ class UtaDatabase:
667
717
  async def get_genomic_tx_data(
668
718
  self,
669
719
  tx_ac: str,
670
- pos: Tuple[int, int],
671
- annotation_layer: Union[
672
- AnnotationLayer.CDNA, AnnotationLayer.GENOMIC
673
- ] = AnnotationLayer.CDNA,
674
- alt_ac: Optional[str] = None,
720
+ pos: tuple[int, int],
721
+ annotation_layer: Literal[AnnotationLayer.CDNA]
722
+ | Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.CDNA,
723
+ alt_ac: str | None = None,
675
724
  target_genome_assembly: Assembly = Assembly.GRCH38,
676
- ) -> Optional[Dict]:
725
+ ) -> dict | None:
677
726
  """Get transcript mapping to genomic data.
678
727
 
679
728
  :param tx_ac: Accession on c. coordinate
@@ -730,7 +779,7 @@ class UtaDatabase:
730
779
 
731
780
  return data
732
781
 
733
- async def get_ac_from_gene(self, gene: str) -> List[str]:
782
+ async def get_ac_from_gene(self, gene: str) -> list[str]:
734
783
  """Return genomic accession(s) associated to a gene.
735
784
 
736
785
  :param gene: Gene symbol
@@ -754,14 +803,16 @@ class UtaDatabase:
754
803
 
755
804
  async def get_gene_from_ac(
756
805
  self, ac: str, start_pos: int, end_pos: int
757
- ) -> Optional[List[str]]:
806
+ ) -> list[str] | None:
758
807
  """Get gene(s) within the provided coordinate range
759
808
 
760
809
  >>> import asyncio
761
810
  >>> from cool_seq_tool.sources import UtaDatabase
762
811
  >>> async def get_gene():
763
812
  ... uta_db = await UtaDatabase.create()
764
- ... result = await uta_db.get_gene_from_ac("NC_000017.11", 43044296, 43045802)
813
+ ... result = await uta_db.get_gene_from_ac(
814
+ ... "NC_000017.11", 43044296, 43045802
815
+ ... )
765
816
  ... return result
766
817
  >>> asyncio.run(get_gene())
767
818
  ['BRCA1']
@@ -798,11 +849,11 @@ class UtaDatabase:
798
849
 
799
850
  async def get_transcripts(
800
851
  self,
801
- start_pos: Optional[int] = None,
802
- end_pos: Optional[int] = None,
803
- gene: Optional[str] = None,
852
+ start_pos: int | None = None,
853
+ end_pos: int | None = None,
854
+ gene: str | None = None,
804
855
  use_tx_pos: bool = True,
805
- alt_ac: Optional[str] = None,
856
+ alt_ac: str | None = None,
806
857
  ) -> pl.DataFrame:
807
858
  """Get transcripts for a given ``gene`` or ``alt_ac`` related to optional positions.
808
859
 
@@ -876,7 +927,7 @@ class UtaDatabase:
876
927
  results_df = results_df.unique()
877
928
  return results_df
878
929
 
879
- async def get_chr_assembly(self, ac: str) -> Optional[Tuple[str, str]]:
930
+ async def get_chr_assembly(self, ac: str) -> tuple[str, str] | None:
880
931
  """Get chromosome and assembly for NC accession if not in GRCh38.
881
932
 
882
933
  :param ac: NC accession
@@ -899,7 +950,7 @@ class UtaDatabase:
899
950
 
900
951
  return chromosome, assembly
901
952
 
902
- async def liftover_to_38(self, genomic_tx_data: Dict) -> None:
953
+ async def liftover_to_38(self, genomic_tx_data: dict) -> None:
903
954
  """Liftover genomic_tx_data to hg38 assembly.
904
955
 
905
956
  :param genomic_tx_data: Dictionary containing gene, nc_accession, alt_pos, and
@@ -955,7 +1006,7 @@ class UtaDatabase:
955
1006
 
956
1007
  def get_liftover(
957
1008
  self, chromosome: str, pos: int, liftover_to_assembly: Assembly
958
- ) -> Optional[Tuple[str, int]]:
1009
+ ) -> tuple[str, int] | None:
959
1010
  """Get new genome assembly data for a position on a chromosome.
960
1011
 
961
1012
  :param chromosome: The chromosome number. Must be prefixed with ``chr``
@@ -982,7 +1033,7 @@ class UtaDatabase:
982
1033
 
983
1034
  def _set_liftover(
984
1035
  self,
985
- genomic_tx_data: Dict,
1036
+ genomic_tx_data: dict,
986
1037
  key: str,
987
1038
  chromosome: str,
988
1039
  liftover_to_assembly: Assembly,
@@ -1019,7 +1070,7 @@ class UtaDatabase:
1019
1070
 
1020
1071
  genomic_tx_data[key] = liftover_start_i[1], liftover_end_i[1]
1021
1072
 
1022
- async def p_to_c_ac(self, p_ac: str) -> List[str]:
1073
+ async def p_to_c_ac(self, p_ac: str) -> list[str]:
1023
1074
  """Return cDNA reference sequence accession from protein reference sequence
1024
1075
  accession (i.e. ``p.`` to ``c.`` in HGVS syntax)
1025
1076
 
@@ -1049,7 +1100,7 @@ class UtaDatabase:
1049
1100
 
1050
1101
  async def get_transcripts_from_genomic_pos(
1051
1102
  self, alt_ac: str, g_pos: int
1052
- ) -> List[str]:
1103
+ ) -> list[str]:
1053
1104
  """Get transcripts associated to a genomic ac and position.
1054
1105
 
1055
1106
  :param alt_ac: Genomic accession
@@ -1115,13 +1166,13 @@ class ParseResult(UrlLibParseResult):
1115
1166
  return super(ParseResult, cls).__new__(cls, *pr) # noqa: UP008
1116
1167
 
1117
1168
  @property
1118
- def database(self) -> Optional[str]:
1169
+ def database(self) -> str | None:
1119
1170
  """Create database property."""
1120
1171
  path_elems = self.path.split("/")
1121
1172
  return path_elems[1] if len(path_elems) > 1 else None
1122
1173
 
1123
1174
  @property
1124
- def schema(self) -> Optional[str]:
1175
+ def schema(self) -> str | None:
1125
1176
  """Create schema property."""
1126
1177
  path_elems = self.path.split("/")
1127
1178
  return path_elems[2] if len(path_elems) > 2 else None
cool_seq_tool/utils.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Provide a small set of general helper functions."""
2
+
2
3
  import datetime
3
4
  import logging
4
- from typing import Tuple
5
5
 
6
6
  from cool_seq_tool.schemas import ResidueMode, ServiceMeta
7
7
  from cool_seq_tool.version import __version__
@@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
11
11
 
12
12
  def get_inter_residue_pos(
13
13
  start_pos: int, end_pos: int, residue_mode: ResidueMode
14
- ) -> Tuple[int, int]:
14
+ ) -> tuple[int, int]:
15
15
  """Return equivalent inter-residue position.
16
16
 
17
17
  Generally, we prefer to work with inter-residue coordinates where possible. Our
cool_seq_tool/version.py CHANGED
@@ -1,2 +1,3 @@
1
1
  """Define package version."""
2
- __version__ = "0.4.0-dev2"
2
+
3
+ __version__ = "0.4.1"
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2021-2023 Wagner Lab
3
+ Copyright (c) 2021-2024 Wagner Lab
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cool_seq_tool
3
- Version: 0.4.0.dev2
3
+ Version: 0.4.1
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
7
7
 
8
- Copyright (c) 2021-2023 Wagner Lab
8
+ Copyright (c) 2021-2024 Wagner Lab
9
9
 
10
10
  Permission is hereby granted, free of charge, to any person obtaining a copy
11
11
  of this software and associated documentation files (the "Software"), to deal
@@ -26,7 +26,7 @@ License: MIT License
26
26
  SOFTWARE.
27
27
 
28
28
  Project-URL: Homepage, https://github.com/genomicmedlab/cool-seq-tool
29
- Project-URL: Documentation, https://coolseqtool.readthedocs.io/en/latest/index.html
29
+ Project-URL: Documentation, https://coolseqtool.readthedocs.io/
30
30
  Project-URL: Changelog, https://github.com/genomicmedlab/cool-seq-tool/releases
31
31
  Project-URL: Source, https://github.com/genomicmedlab/cool-seq-tool
32
32
  Project-URL: Bug Tracker, https://github.com/genomicmedlab/cool-seq-tool/issues
@@ -39,30 +39,30 @@ Classifier: Intended Audience :: Developers
39
39
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
40
40
  Classifier: License :: OSI Approved :: MIT License
41
41
  Classifier: Programming Language :: Python :: 3
42
- Classifier: Programming Language :: Python :: 3.8
43
- Classifier: Programming Language :: Python :: 3.9
44
42
  Classifier: Programming Language :: Python :: 3.10
45
43
  Classifier: Programming Language :: Python :: 3.11
46
- Requires-Python: >=3.8
44
+ Classifier: Programming Language :: Python :: 3.12
45
+ Requires-Python: >=3.10
47
46
  Description-Content-Type: text/markdown
48
47
  License-File: LICENSE
49
48
  Requires-Dist: asyncpg
50
49
  Requires-Dist: aiofiles
51
50
  Requires-Dist: boto3
52
51
  Requires-Dist: agct >=0.1.0-dev1
53
- Requires-Dist: polars
52
+ Requires-Dist: polars ~=1.0
54
53
  Requires-Dist: hgvs
55
54
  Requires-Dist: biocommons.seqrepo
56
55
  Requires-Dist: pydantic ==2.*
57
56
  Requires-Dist: uvicorn
58
57
  Requires-Dist: fastapi
59
58
  Requires-Dist: ga4gh.vrs
59
+ Requires-Dist: wags-tails ~=0.1.3
60
60
  Provides-Extra: dev
61
61
  Requires-Dist: pre-commit ; extra == 'dev'
62
62
  Requires-Dist: ipython ; extra == 'dev'
63
63
  Requires-Dist: ipykernel ; extra == 'dev'
64
64
  Requires-Dist: psycopg2-binary ; extra == 'dev'
65
- Requires-Dist: ruff >=0.1.14 ; extra == 'dev'
65
+ Requires-Dist: ruff ==0.5.0 ; extra == 'dev'
66
66
  Provides-Extra: docs
67
67
  Requires-Dist: sphinx ==6.1.3 ; extra == 'docs'
68
68
  Requires-Dist: sphinx-autodoc-typehints ==1.22.0 ; extra == 'docs'
@@ -81,8 +81,14 @@ Requires-Dist: mock ; extra == 'tests'
81
81
  CoolSeqTool
82
82
  </h1>
83
83
 
84
+ [![image](https://img.shields.io/pypi/v/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![image](https://img.shields.io/pypi/l/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![image](https://img.shields.io/pypi/pyversions/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![Actions status](https://github.com/genomicmedlab/cool-seq-tool/actions/workflows/checks.yaml/badge.svg)](https://github.com/genomicmedlab/cool-seq-tool/actions/checks.yaml)
85
+
86
+ ---
87
+
84
88
  **[Documentation](https://coolseqtool.readthedocs.io/latest/)** · [Installation](https://coolseqtool.readthedocs.io/latest/install.html) · [Usage](https://coolseqtool.readthedocs.io/latest/usage.html) · [API reference](https://coolseqtool.readthedocs.io/latest/reference/index.html)
85
89
 
90
+ ---
91
+
86
92
  ## Overview
87
93
 
88
94
  <!-- description -->
@@ -113,6 +119,7 @@ All CoolSeqTool resources can be initialized by way of a top-level class instanc
113
119
 
114
120
  ```pycon
115
121
  >>> from cool_seq_tool.app import CoolSeqTool
122
+ >>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
116
123
  >>> cst = CoolSeqTool()
117
124
  >>> result = await cst.mane_transcript.get_mane_transcript(
118
125
  ... "NP_004324.2",
@@ -0,0 +1,29 @@
1
+ cool_seq_tool/__init__.py,sha256=BTfkS0bkMtxBL4yGHc4Z7ubmNhdhY2WALfadnk8N1lw,280
2
+ cool_seq_tool/api.py,sha256=AbCmdUVH8ltwqH8k7DiVsHpujMzb6c5pyAKY12iIC0U,1210
3
+ cool_seq_tool/app.py,sha256=5dBmzTf5SeIF90y_ZyI0K6AMSKgchC33eW_ABN6D8_s,4790
4
+ cool_seq_tool/schemas.py,sha256=8xGrP0rAcKLXtZYEe_DJcNp4zapjhN0StRq8uCjoobE,16720
5
+ cool_seq_tool/utils.py,sha256=lckkyFKxMAqG79SYO3p28q6BWgEjlQP7CumE2TDP1zc,1601
6
+ cool_seq_tool/version.py,sha256=hs3N9Wl67casrrQa2sGIAcpcaUySVk4oLE7JffoQuCI,53
7
+ cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
8
+ cool_seq_tool/handlers/seqrepo_access.py,sha256=JB3cg7YiV2JKa7ImJXz4WtP9XWShk9qYvhCCrZnBQ6M,8983
9
+ cool_seq_tool/mappers/__init__.py,sha256=SMSf6sPcu7mdQNuJ4Cj1mbOwFUPuMdFSf0noY4XvTxE,262
10
+ cool_seq_tool/mappers/alignment.py,sha256=6Vk4XEar54ivuH8N7oBqa9gUa8E5GjWCI9hC1HCkM18,9552
11
+ cool_seq_tool/mappers/exon_genomic_coords.py,sha256=tOmo6kFGcFIRmLBQwSsIZUSiratiyACf946YKV_IU78,38544
12
+ cool_seq_tool/mappers/mane_transcript.py,sha256=RrVRUS4IqxxX-HyamNLqpQ_WVWABgiLqwmmIh92uny8,49264
13
+ cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
14
+ cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oIjqk2sYQM,3837
15
+ cool_seq_tool/resources/status.py,sha256=ENsLiwSxzJOLOsY5IKDM805UWbQAOV3w9s7Rv_FLAUs,5761
16
+ cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
17
+ cool_seq_tool/routers/__init__.py,sha256=7SqhLv6_mDPpK1Q0L9aykmjhCmsymFqgbSWZH8LuCW0,437
18
+ cool_seq_tool/routers/default.py,sha256=zqeQmHmfGUvV32xLbN-fUfYnK_UI1gpqIL8Eu5Y8KzY,3928
19
+ cool_seq_tool/routers/mane.py,sha256=boZKP5PH0BAcqEeTBBr9Z3EMY4lhvLLX-pJxUqjBZQ0,3508
20
+ cool_seq_tool/routers/mappings.py,sha256=UJaip0QvRfK3Lk3eVuwofUwg2XJqMV5OVY9OLcpnWS4,6061
21
+ cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
22
+ cool_seq_tool/sources/mane_transcript_mappings.py,sha256=9Rd_tRCrTk9i9Urp-pMMttq4cCbIJaEJ0n8rM9y9-7I,4077
23
+ cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
24
+ cool_seq_tool/sources/uta_database.py,sha256=GJHhYbH130YJo9FIRroR8eavlbaziMwI0JVNP8IPGPM,45636
25
+ cool_seq_tool-0.4.1.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
26
+ cool_seq_tool-0.4.1.dist-info/METADATA,sha256=CnZwl-rVLfY6kcVkQKYsYziT19q48qHRkYFQ96-OCx0,6262
27
+ cool_seq_tool-0.4.1.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
28
+ cool_seq_tool-0.4.1.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
29
+ cool_seq_tool-0.4.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.42.0)
2
+ Generator: setuptools (70.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,2 +0,0 @@
1
- """Module for data"""
2
- from .data_downloads import DataDownload
@@ -1,89 +0,0 @@
1
- """Handle acquisition of external data."""
2
- import datetime
3
- import gzip
4
- import logging
5
- import shutil
6
- from ftplib import FTP
7
- from pathlib import Path
8
-
9
- from dateutil import parser
10
-
11
- from cool_seq_tool import APP_ROOT
12
-
13
- logger = logging.getLogger("cool_seq_tool")
14
-
15
-
16
- class DataDownload:
17
- """Manage downloadable data files. Responsible for checking if files are available
18
- under expected locations, and fetching them if not.
19
-
20
- Relevant methods are called automatically by data classes; users should not have
21
- to interact with this class under normal circumstances.
22
- """
23
-
24
- def __init__(self) -> None:
25
- """Initialize downloadable data locations."""
26
- self._data_dir = APP_ROOT / "data"
27
-
28
- def get_mane_summary(self) -> Path:
29
- """Identify latest MANE summary data. If unavailable locally, download from
30
- `NCBI FTP server <https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/current/>`_.
31
-
32
- :return: path to MANE summary file
33
- """
34
- with FTP("ftp.ncbi.nlm.nih.gov") as ftp:
35
- ftp.login()
36
- ftp.cwd("/refseq/MANE/MANE_human/current")
37
- files = ftp.nlst()
38
- mane_summary_file = [f for f in files if f.endswith(".summary.txt.gz")]
39
- if not mane_summary_file:
40
- msg = "Unable to download MANE summary data"
41
- raise Exception(msg)
42
- mane_summary_file = mane_summary_file[0]
43
- self._mane_summary_path = self._data_dir / mane_summary_file[:-3]
44
- mane_data_path = self._data_dir / mane_summary_file
45
- if not self._mane_summary_path.exists():
46
- logger.info("Downloading MANE summary file from NCBI.")
47
- with mane_data_path.open("wb") as fp:
48
- ftp.retrbinary(f"RETR {mane_summary_file}", fp.write)
49
- with gzip.open(
50
- mane_data_path, "rb"
51
- ) as f_in, self._mane_summary_path.open("wb") as f_out:
52
- shutil.copyfileobj(f_in, f_out)
53
- mane_data_path.unlink()
54
- logger.info("MANE summary file download complete.")
55
- return self._mane_summary_path
56
-
57
- def get_lrg_refseq_gene_data(self) -> Path:
58
- """Identify latest LRG RefSeq Gene file. If unavailable locally, download from
59
- `NCBI FTP server <https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/>`_.
60
-
61
- :return: path to acquired LRG RefSeq Gene data file
62
- """
63
- with FTP("ftp.ncbi.nlm.nih.gov") as ftp:
64
- ftp.login()
65
- lrg_refseqgene_file = "LRG_RefSeqGene"
66
- ftp_dir_path = "/refseq/H_sapiens/RefSeqGene/"
67
- ftp_file_path = f"{ftp_dir_path}{lrg_refseqgene_file}"
68
- timestamp = ftp.voidcmd(f"MDTM {ftp_file_path}")[4:].strip()
69
- date = str(parser.parse(timestamp)).split()[0]
70
- version = (
71
- datetime.datetime.strptime(date, "%Y-%m-%d")
72
- .astimezone(tz=datetime.timezone.utc)
73
- .strftime("%Y%m%d")
74
- )
75
- fn_versioned = f"{lrg_refseqgene_file}_{version}"
76
- lrg_refseqgene_path = self._data_dir / lrg_refseqgene_file
77
- self._lrg_refseqgene_path = self._data_dir / fn_versioned
78
- if not self._lrg_refseqgene_path.exists():
79
- logger.info("Downloading LRG RefSeq data from NCBI.")
80
- ftp.cwd(ftp_dir_path)
81
- with lrg_refseqgene_path.open("wb") as fp:
82
- ftp.retrbinary(f"RETR {lrg_refseqgene_file}", fp.write)
83
- with lrg_refseqgene_path.open(
84
- "rb"
85
- ) as f_in, self._lrg_refseqgene_path.open("wb") as f_out:
86
- shutil.copyfileobj(f_in, f_out)
87
- lrg_refseqgene_path.unlink()
88
- logger.info("LRG RefSeq data download complete.")
89
- return self._lrg_refseqgene_path