cool-seq-tool 0.4.0.dev3__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. cool_seq_tool/__init__.py +1 -3
  2. cool_seq_tool/api.py +1 -2
  3. cool_seq_tool/app.py +38 -23
  4. cool_seq_tool/handlers/__init__.py +1 -0
  5. cool_seq_tool/handlers/seqrepo_access.py +13 -15
  6. cool_seq_tool/mappers/__init__.py +1 -0
  7. cool_seq_tool/mappers/alignment.py +5 -6
  8. cool_seq_tool/mappers/exon_genomic_coords.py +75 -73
  9. cool_seq_tool/mappers/mane_transcript.py +84 -86
  10. cool_seq_tool/resources/__init__.py +1 -0
  11. cool_seq_tool/resources/data_files.py +93 -0
  12. cool_seq_tool/resources/status.py +151 -0
  13. cool_seq_tool/routers/__init__.py +1 -0
  14. cool_seq_tool/routers/default.py +1 -0
  15. cool_seq_tool/routers/mane.py +4 -4
  16. cool_seq_tool/routers/mappings.py +2 -2
  17. cool_seq_tool/schemas.py +83 -37
  18. cool_seq_tool/sources/__init__.py +1 -0
  19. cool_seq_tool/sources/mane_transcript_mappings.py +14 -7
  20. cool_seq_tool/sources/transcript_mappings.py +41 -32
  21. cool_seq_tool/sources/uta_database.py +91 -70
  22. cool_seq_tool/utils.py +2 -2
  23. cool_seq_tool/version.py +2 -1
  24. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.4.1.dist-info}/LICENSE +1 -1
  25. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.4.1.dist-info}/METADATA +15 -8
  26. cool_seq_tool-0.4.1.dist-info/RECORD +29 -0
  27. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.4.1.dist-info}/WHEEL +1 -1
  28. cool_seq_tool/data/__init__.py +0 -2
  29. cool_seq_tool/data/data_downloads.py +0 -89
  30. cool_seq_tool/paths.py +0 -28
  31. cool_seq_tool-0.4.0.dev3.dist-info/RECORD +0 -29
  32. /cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
  33. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.4.1.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,10 @@
1
1
  """Provide transcript lookup and metadata tools via the UTA database."""
2
+
2
3
  import ast
3
4
  import base64
4
5
  import logging
5
6
  from os import environ
6
- from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
7
+ from typing import Any, Literal, TypeVar
7
8
  from urllib.parse import ParseResult as UrlLibParseResult
8
9
  from urllib.parse import quote, unquote, urlparse
9
10
 
@@ -24,12 +25,43 @@ LIFTOVER_CHAIN_37_TO_38 = environ.get("LIFTOVER_CHAIN_37_TO_38")
24
25
  LIFTOVER_CHAIN_38_TO_37 = environ.get("LIFTOVER_CHAIN_38_TO_37")
25
26
 
26
27
  UTA_DB_URL = environ.get(
27
- "UTA_DB_URL", "postgresql://uta_admin:uta@localhost:5433/uta/uta_20210129b"
28
+ "UTA_DB_URL", "postgresql://uta_admin:uta@localhost:5432/uta/uta_20210129b"
28
29
  )
29
30
 
30
31
  logger = logging.getLogger(__name__)
31
32
 
32
33
 
34
+ def get_liftover(
35
+ chain_file_37_to_38: str | None = None, chain_file_38_to_37: str | None = None
36
+ ) -> tuple[Converter, Converter]:
37
+ """Fetch Converter instances between GRCh37 and 38.
38
+
39
+ Factored out of the UTA Database initialization method to support less expensive
40
+ status check-type operations.
41
+
42
+ :param chain_file_37_to_38: Optional path to chain file for 37 to 38 assembly.
43
+ This is used for ``agct``. If this is not provided, will check to see
44
+ if ``LIFTOVER_CHAIN_37_TO_38`` env var is set. If neither is provided, will
45
+ allow ``agct`` to download a chain file from UCSC
46
+ :param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly.
47
+ This is used for ``agct``. If this is not provided, will check to see
48
+ if ``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will
49
+ allow ``agct`` to download a chain file from UCSC
50
+ :return: converters (37->38, 38->37)
51
+ """
52
+ chain_file_37_to_38 = chain_file_37_to_38 or LIFTOVER_CHAIN_37_TO_38
53
+ if chain_file_37_to_38:
54
+ converter_37_to_38 = Converter(chainfile=chain_file_37_to_38)
55
+ else:
56
+ converter_37_to_38 = Converter(from_db=Genome.HG19, to_db=Genome.HG38)
57
+ chain_file_38_to_37 = chain_file_38_to_37 or LIFTOVER_CHAIN_38_TO_37
58
+ if chain_file_38_to_37:
59
+ converter_38_to_37 = Converter(chainfile=chain_file_38_to_37)
60
+ else:
61
+ converter_38_to_37 = Converter(from_db=Genome.HG38, to_db=Genome.HG19)
62
+ return (converter_37_to_38, converter_38_to_37)
63
+
64
+
33
65
  class UtaDatabase:
34
66
  """Provide transcript lookup and metadata tools via the Universal Transcript Archive
35
67
  (UTA) database.
@@ -46,8 +78,8 @@ class UtaDatabase:
46
78
  def __init__(
47
79
  self,
48
80
  db_url: str = UTA_DB_URL,
49
- chain_file_37_to_38: Optional[str] = None,
50
- chain_file_38_to_37: Optional[str] = None,
81
+ chain_file_37_to_38: str | None = None,
82
+ chain_file_38_to_37: str | None = None,
51
83
  ) -> None:
52
84
  """Initialize DB class. Should only be used by ``create()`` method, and not
53
85
  be called directly by a user.
@@ -68,20 +100,11 @@ class UtaDatabase:
68
100
  original_pwd = db_url.split("//")[-1].split("@")[0].split(":")[-1]
69
101
  self.db_url = db_url.replace(original_pwd, quote(original_pwd))
70
102
  self.args = self._get_conn_args()
103
+ self.liftover_37_to_38, self.liftover_38_to_37 = get_liftover(
104
+ chain_file_37_to_38, chain_file_38_to_37
105
+ )
71
106
 
72
- chain_file_37_to_38 = chain_file_37_to_38 or LIFTOVER_CHAIN_37_TO_38
73
- if chain_file_37_to_38:
74
- self.liftover_37_to_38 = Converter(chainfile=chain_file_37_to_38)
75
- else:
76
- self.liftover_37_to_38 = Converter(from_db=Genome.HG19, to_db=Genome.HG38)
77
-
78
- chain_file_38_to_37 = chain_file_38_to_37 or LIFTOVER_CHAIN_38_TO_37
79
- if chain_file_38_to_37:
80
- self.liftover_38_to_37 = Converter(chainfile=chain_file_38_to_37)
81
- else:
82
- self.liftover_38_to_37 = Converter(from_db=Genome.HG38, to_db=Genome.HG19)
83
-
84
- def _get_conn_args(self) -> Dict:
107
+ def _get_conn_args(self) -> dict:
85
108
  """Return connection arguments.
86
109
 
87
110
  :param db_url: raw connection URL
@@ -99,9 +122,9 @@ class UtaDatabase:
99
122
  self.schema = schema
100
123
 
101
124
  environ["PGPASSWORD"] = password
102
- environ[
103
- "UTA_DB_URL"
104
- ] = f"postgresql://{username}@{host}:{port}/{database}/{schema}"
125
+ environ["UTA_DB_URL"] = (
126
+ f"postgresql://{username}@{host}:{port}/{database}/{schema}"
127
+ )
105
128
  return {
106
129
  "host": host,
107
130
  "port": int(port),
@@ -145,7 +168,7 @@ class UtaDatabase:
145
168
 
146
169
  @classmethod
147
170
  async def create(
148
- cls: Type[UTADatabaseType], db_url: str = UTA_DB_URL
171
+ cls: type[UTADatabaseType], db_url: str = UTA_DB_URL
149
172
  ) -> UTADatabaseType:
150
173
  """Manufacture a fully-initialized class instance (a la factory pattern). This
151
174
  method should be used instead of calling the class directly to create a new
@@ -173,7 +196,10 @@ class UtaDatabase:
173
196
  """
174
197
 
175
198
  async def _execute_query(q: str) -> Any: # noqa: ANN401
176
- async with self._connection_pool.acquire() as connection, connection.transaction():
199
+ async with (
200
+ self._connection_pool.acquire() as connection,
201
+ connection.transaction(),
202
+ ):
177
203
  return await connection.fetch(q)
178
204
 
179
205
  if not self._connection_pool:
@@ -234,25 +260,22 @@ class UtaDatabase:
234
260
  await self.execute_query(create_index)
235
261
 
236
262
  @staticmethod
237
- def _transform_list(li: List) -> List[List[Any]]:
263
+ def _transform_list(li: list) -> list[list[Any]]:
238
264
  """Transform list to only contain field values
239
265
 
240
266
  :param li: List of asyncpg.Record objects
241
267
  :return: List of list of objects
242
268
  """
243
- results = []
244
- for item in li:
245
- results.append(list(item))
246
- return results
269
+ return [list(i) for i in li]
247
270
 
248
271
  async def get_genes_and_alt_acs(
249
272
  self,
250
273
  pos: int,
251
- strand: Optional[Strand] = None,
252
- chromosome: Optional[int] = None,
253
- alt_ac: Optional[str] = None,
254
- gene: Optional[str] = None,
255
- ) -> Tuple[Optional[Dict], Optional[str]]:
274
+ strand: Strand | None = None,
275
+ chromosome: int | None = None,
276
+ alt_ac: str | None = None,
277
+ gene: str | None = None,
278
+ ) -> tuple[dict | None, str | None]:
256
279
  """Return genes and genomic accessions for a position on a chromosome or alt_ac
257
280
 
258
281
  :param pos: Genomic position
@@ -309,8 +332,8 @@ class UtaDatabase:
309
332
  return {"genes": genes, "alt_acs": alt_acs}, None
310
333
 
311
334
  async def get_tx_exons(
312
- self, tx_ac: str, alt_ac: Optional[str] = None
313
- ) -> Tuple[Optional[List[Tuple[int, int]]], Optional[str]]:
335
+ self, tx_ac: str, alt_ac: str | None = None
336
+ ) -> tuple[list[tuple[int, int]] | None, str | None]:
314
337
  """Get list of transcript exons start/end coordinates.
315
338
 
316
339
  :param tx_ac: Transcript accession
@@ -352,7 +375,7 @@ class UtaDatabase:
352
375
  self,
353
376
  tx_ac: str,
354
377
  alt_ac: str,
355
- ) -> Tuple[Optional[Tuple[int, int, int, int, int]], Optional[str]]:
378
+ ) -> tuple[tuple[int, int, int, int, int] | None, str | None]:
356
379
  """Get exon number, transcript coordinates, and genomic coordinates
357
380
 
358
381
  :param tx_ac: Transcript accession
@@ -379,8 +402,8 @@ class UtaDatabase:
379
402
  return tx_exons_genomic_coords, None
380
403
 
381
404
  async def get_alt_ac_start_or_end(
382
- self, tx_ac: str, tx_exon_start: int, tx_exon_end: int, gene: Optional[str]
383
- ) -> Tuple[Optional[Tuple[str, str, int, int, int]], Optional[str]]:
405
+ self, tx_ac: str, tx_exon_start: int, tx_exon_end: int, gene: str | None
406
+ ) -> tuple[tuple[str, str, int, int, int] | None, str | None]:
384
407
  """Get genomic data for related transcript exon start or end.
385
408
 
386
409
  :param tx_ac: Transcript accession
@@ -420,7 +443,7 @@ class UtaDatabase:
420
443
  result = result[0]
421
444
  return (result[0], result[1], result[2], result[3], result[4]), None
422
445
 
423
- async def get_cds_start_end(self, tx_ac: str) -> Optional[Tuple[int, int]]:
446
+ async def get_cds_start_end(self, tx_ac: str) -> tuple[int, int] | None:
424
447
  """Get coding start and end site
425
448
 
426
449
  :param tx_ac: Transcript accession
@@ -444,7 +467,7 @@ class UtaDatabase:
444
467
  )
445
468
  return None
446
469
 
447
- async def get_newest_assembly_ac(self, ac: str) -> List[str]:
470
+ async def get_newest_assembly_ac(self, ac: str) -> list[str]:
448
471
  """Find accession associated to latest genomic assembly
449
472
 
450
473
  :param ac: Accession
@@ -489,7 +512,7 @@ class UtaDatabase:
489
512
  result = await self.execute_query(query)
490
513
  return result[0][0]
491
514
 
492
- async def get_ac_descr(self, ac: str) -> Optional[str]:
515
+ async def get_ac_descr(self, ac: str) -> str | None:
493
516
  """Return accession description. This is typically available only for accessions
494
517
  from older (pre-GRCh38) builds.
495
518
 
@@ -524,10 +547,10 @@ class UtaDatabase:
524
547
  tx_ac: str,
525
548
  start_pos: int,
526
549
  end_pos: int,
527
- alt_ac: Optional[str] = None,
550
+ alt_ac: str | None = None,
528
551
  use_tx_pos: bool = True,
529
552
  like_tx_ac: bool = False,
530
- ) -> List:
553
+ ) -> list:
531
554
  """Return queried data from tx_exon_aln_v table.
532
555
 
533
556
  :param tx_ac: accession on c. coordinate
@@ -592,13 +615,10 @@ class UtaDatabase:
592
615
  temp_ac,
593
616
  alt_ac,
594
617
  )
595
- results = []
596
- for r in result:
597
- results.append(list(r))
598
- return results
618
+ return [list(r) for r in result]
599
619
 
600
620
  @staticmethod
601
- def data_from_result(result: List) -> Optional[Dict]:
621
+ def data_from_result(result: list) -> dict | None:
602
622
  """Return data found from result.
603
623
 
604
624
  :param result: Data from tx_exon_aln_v table
@@ -631,8 +651,8 @@ class UtaDatabase:
631
651
  }
632
652
 
633
653
  async def get_mane_c_genomic_data(
634
- self, ac: str, alt_ac: Optional[str], start_pos: int, end_pos: int
635
- ) -> Optional[Dict]:
654
+ self, ac: str, alt_ac: str | None, start_pos: int, end_pos: int
655
+ ) -> dict | None:
636
656
  """Get MANE transcript and genomic data. Used when going from g. to MANE c.
637
657
  representation.
638
658
 
@@ -697,13 +717,12 @@ class UtaDatabase:
697
717
  async def get_genomic_tx_data(
698
718
  self,
699
719
  tx_ac: str,
700
- pos: Tuple[int, int],
701
- annotation_layer: Union[
702
- AnnotationLayer.CDNA, AnnotationLayer.GENOMIC
703
- ] = AnnotationLayer.CDNA,
704
- alt_ac: Optional[str] = None,
720
+ pos: tuple[int, int],
721
+ annotation_layer: Literal[AnnotationLayer.CDNA]
722
+ | Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.CDNA,
723
+ alt_ac: str | None = None,
705
724
  target_genome_assembly: Assembly = Assembly.GRCH38,
706
- ) -> Optional[Dict]:
725
+ ) -> dict | None:
707
726
  """Get transcript mapping to genomic data.
708
727
 
709
728
  :param tx_ac: Accession on c. coordinate
@@ -760,7 +779,7 @@ class UtaDatabase:
760
779
 
761
780
  return data
762
781
 
763
- async def get_ac_from_gene(self, gene: str) -> List[str]:
782
+ async def get_ac_from_gene(self, gene: str) -> list[str]:
764
783
  """Return genomic accession(s) associated to a gene.
765
784
 
766
785
  :param gene: Gene symbol
@@ -784,14 +803,16 @@ class UtaDatabase:
784
803
 
785
804
  async def get_gene_from_ac(
786
805
  self, ac: str, start_pos: int, end_pos: int
787
- ) -> Optional[List[str]]:
806
+ ) -> list[str] | None:
788
807
  """Get gene(s) within the provided coordinate range
789
808
 
790
809
  >>> import asyncio
791
810
  >>> from cool_seq_tool.sources import UtaDatabase
792
811
  >>> async def get_gene():
793
812
  ... uta_db = await UtaDatabase.create()
794
- ... result = await uta_db.get_gene_from_ac("NC_000017.11", 43044296, 43045802)
813
+ ... result = await uta_db.get_gene_from_ac(
814
+ ... "NC_000017.11", 43044296, 43045802
815
+ ... )
795
816
  ... return result
796
817
  >>> asyncio.run(get_gene())
797
818
  ['BRCA1']
@@ -828,11 +849,11 @@ class UtaDatabase:
828
849
 
829
850
  async def get_transcripts(
830
851
  self,
831
- start_pos: Optional[int] = None,
832
- end_pos: Optional[int] = None,
833
- gene: Optional[str] = None,
852
+ start_pos: int | None = None,
853
+ end_pos: int | None = None,
854
+ gene: str | None = None,
834
855
  use_tx_pos: bool = True,
835
- alt_ac: Optional[str] = None,
856
+ alt_ac: str | None = None,
836
857
  ) -> pl.DataFrame:
837
858
  """Get transcripts for a given ``gene`` or ``alt_ac`` related to optional positions.
838
859
 
@@ -906,7 +927,7 @@ class UtaDatabase:
906
927
  results_df = results_df.unique()
907
928
  return results_df
908
929
 
909
- async def get_chr_assembly(self, ac: str) -> Optional[Tuple[str, str]]:
930
+ async def get_chr_assembly(self, ac: str) -> tuple[str, str] | None:
910
931
  """Get chromosome and assembly for NC accession if not in GRCh38.
911
932
 
912
933
  :param ac: NC accession
@@ -929,7 +950,7 @@ class UtaDatabase:
929
950
 
930
951
  return chromosome, assembly
931
952
 
932
- async def liftover_to_38(self, genomic_tx_data: Dict) -> None:
953
+ async def liftover_to_38(self, genomic_tx_data: dict) -> None:
933
954
  """Liftover genomic_tx_data to hg38 assembly.
934
955
 
935
956
  :param genomic_tx_data: Dictionary containing gene, nc_accession, alt_pos, and
@@ -985,7 +1006,7 @@ class UtaDatabase:
985
1006
 
986
1007
  def get_liftover(
987
1008
  self, chromosome: str, pos: int, liftover_to_assembly: Assembly
988
- ) -> Optional[Tuple[str, int]]:
1009
+ ) -> tuple[str, int] | None:
989
1010
  """Get new genome assembly data for a position on a chromosome.
990
1011
 
991
1012
  :param chromosome: The chromosome number. Must be prefixed with ``chr``
@@ -1012,7 +1033,7 @@ class UtaDatabase:
1012
1033
 
1013
1034
  def _set_liftover(
1014
1035
  self,
1015
- genomic_tx_data: Dict,
1036
+ genomic_tx_data: dict,
1016
1037
  key: str,
1017
1038
  chromosome: str,
1018
1039
  liftover_to_assembly: Assembly,
@@ -1049,7 +1070,7 @@ class UtaDatabase:
1049
1070
 
1050
1071
  genomic_tx_data[key] = liftover_start_i[1], liftover_end_i[1]
1051
1072
 
1052
- async def p_to_c_ac(self, p_ac: str) -> List[str]:
1073
+ async def p_to_c_ac(self, p_ac: str) -> list[str]:
1053
1074
  """Return cDNA reference sequence accession from protein reference sequence
1054
1075
  accession (i.e. ``p.`` to ``c.`` in HGVS syntax)
1055
1076
 
@@ -1079,7 +1100,7 @@ class UtaDatabase:
1079
1100
 
1080
1101
  async def get_transcripts_from_genomic_pos(
1081
1102
  self, alt_ac: str, g_pos: int
1082
- ) -> List[str]:
1103
+ ) -> list[str]:
1083
1104
  """Get transcripts associated to a genomic ac and position.
1084
1105
 
1085
1106
  :param alt_ac: Genomic accession
@@ -1145,13 +1166,13 @@ class ParseResult(UrlLibParseResult):
1145
1166
  return super(ParseResult, cls).__new__(cls, *pr) # noqa: UP008
1146
1167
 
1147
1168
  @property
1148
- def database(self) -> Optional[str]:
1169
+ def database(self) -> str | None:
1149
1170
  """Create database property."""
1150
1171
  path_elems = self.path.split("/")
1151
1172
  return path_elems[1] if len(path_elems) > 1 else None
1152
1173
 
1153
1174
  @property
1154
- def schema(self) -> Optional[str]:
1175
+ def schema(self) -> str | None:
1155
1176
  """Create schema property."""
1156
1177
  path_elems = self.path.split("/")
1157
1178
  return path_elems[2] if len(path_elems) > 2 else None
cool_seq_tool/utils.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Provide a small set of general helper functions."""
2
+
2
3
  import datetime
3
4
  import logging
4
- from typing import Tuple
5
5
 
6
6
  from cool_seq_tool.schemas import ResidueMode, ServiceMeta
7
7
  from cool_seq_tool.version import __version__
@@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
11
11
 
12
12
  def get_inter_residue_pos(
13
13
  start_pos: int, end_pos: int, residue_mode: ResidueMode
14
- ) -> Tuple[int, int]:
14
+ ) -> tuple[int, int]:
15
15
  """Return equivalent inter-residue position.
16
16
 
17
17
  Generally, we prefer to work with inter-residue coordinates where possible. Our
cool_seq_tool/version.py CHANGED
@@ -1,2 +1,3 @@
1
1
  """Define package version."""
2
- __version__ = "0.4.0-dev3"
2
+
3
+ __version__ = "0.4.1"
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2021-2023 Wagner Lab
3
+ Copyright (c) 2021-2024 Wagner Lab
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cool_seq_tool
3
- Version: 0.4.0.dev3
3
+ Version: 0.4.1
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
7
7
 
8
- Copyright (c) 2021-2023 Wagner Lab
8
+ Copyright (c) 2021-2024 Wagner Lab
9
9
 
10
10
  Permission is hereby granted, free of charge, to any person obtaining a copy
11
11
  of this software and associated documentation files (the "Software"), to deal
@@ -26,7 +26,7 @@ License: MIT License
26
26
  SOFTWARE.
27
27
 
28
28
  Project-URL: Homepage, https://github.com/genomicmedlab/cool-seq-tool
29
- Project-URL: Documentation, https://coolseqtool.readthedocs.io/en/latest/index.html
29
+ Project-URL: Documentation, https://coolseqtool.readthedocs.io/
30
30
  Project-URL: Changelog, https://github.com/genomicmedlab/cool-seq-tool/releases
31
31
  Project-URL: Source, https://github.com/genomicmedlab/cool-seq-tool
32
32
  Project-URL: Bug Tracker, https://github.com/genomicmedlab/cool-seq-tool/issues
@@ -39,30 +39,30 @@ Classifier: Intended Audience :: Developers
39
39
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
40
40
  Classifier: License :: OSI Approved :: MIT License
41
41
  Classifier: Programming Language :: Python :: 3
42
- Classifier: Programming Language :: Python :: 3.8
43
- Classifier: Programming Language :: Python :: 3.9
44
42
  Classifier: Programming Language :: Python :: 3.10
45
43
  Classifier: Programming Language :: Python :: 3.11
46
- Requires-Python: >=3.8
44
+ Classifier: Programming Language :: Python :: 3.12
45
+ Requires-Python: >=3.10
47
46
  Description-Content-Type: text/markdown
48
47
  License-File: LICENSE
49
48
  Requires-Dist: asyncpg
50
49
  Requires-Dist: aiofiles
51
50
  Requires-Dist: boto3
52
51
  Requires-Dist: agct >=0.1.0-dev1
53
- Requires-Dist: polars
52
+ Requires-Dist: polars ~=1.0
54
53
  Requires-Dist: hgvs
55
54
  Requires-Dist: biocommons.seqrepo
56
55
  Requires-Dist: pydantic ==2.*
57
56
  Requires-Dist: uvicorn
58
57
  Requires-Dist: fastapi
59
58
  Requires-Dist: ga4gh.vrs
59
+ Requires-Dist: wags-tails ~=0.1.3
60
60
  Provides-Extra: dev
61
61
  Requires-Dist: pre-commit ; extra == 'dev'
62
62
  Requires-Dist: ipython ; extra == 'dev'
63
63
  Requires-Dist: ipykernel ; extra == 'dev'
64
64
  Requires-Dist: psycopg2-binary ; extra == 'dev'
65
- Requires-Dist: ruff ==0.2.0 ; extra == 'dev'
65
+ Requires-Dist: ruff ==0.5.0 ; extra == 'dev'
66
66
  Provides-Extra: docs
67
67
  Requires-Dist: sphinx ==6.1.3 ; extra == 'docs'
68
68
  Requires-Dist: sphinx-autodoc-typehints ==1.22.0 ; extra == 'docs'
@@ -81,8 +81,14 @@ Requires-Dist: mock ; extra == 'tests'
81
81
  CoolSeqTool
82
82
  </h1>
83
83
 
84
+ [![image](https://img.shields.io/pypi/v/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![image](https://img.shields.io/pypi/l/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![image](https://img.shields.io/pypi/pyversions/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![Actions status](https://github.com/genomicmedlab/cool-seq-tool/actions/workflows/checks.yaml/badge.svg)](https://github.com/genomicmedlab/cool-seq-tool/actions/checks.yaml)
85
+
86
+ ---
87
+
84
88
  **[Documentation](https://coolseqtool.readthedocs.io/latest/)** · [Installation](https://coolseqtool.readthedocs.io/latest/install.html) · [Usage](https://coolseqtool.readthedocs.io/latest/usage.html) · [API reference](https://coolseqtool.readthedocs.io/latest/reference/index.html)
85
89
 
90
+ ---
91
+
86
92
  ## Overview
87
93
 
88
94
  <!-- description -->
@@ -113,6 +119,7 @@ All CoolSeqTool resources can be initialized by way of a top-level class instanc
113
119
 
114
120
  ```pycon
115
121
  >>> from cool_seq_tool.app import CoolSeqTool
122
+ >>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
116
123
  >>> cst = CoolSeqTool()
117
124
  >>> result = await cst.mane_transcript.get_mane_transcript(
118
125
  ... "NP_004324.2",
@@ -0,0 +1,29 @@
1
+ cool_seq_tool/__init__.py,sha256=BTfkS0bkMtxBL4yGHc4Z7ubmNhdhY2WALfadnk8N1lw,280
2
+ cool_seq_tool/api.py,sha256=AbCmdUVH8ltwqH8k7DiVsHpujMzb6c5pyAKY12iIC0U,1210
3
+ cool_seq_tool/app.py,sha256=5dBmzTf5SeIF90y_ZyI0K6AMSKgchC33eW_ABN6D8_s,4790
4
+ cool_seq_tool/schemas.py,sha256=8xGrP0rAcKLXtZYEe_DJcNp4zapjhN0StRq8uCjoobE,16720
5
+ cool_seq_tool/utils.py,sha256=lckkyFKxMAqG79SYO3p28q6BWgEjlQP7CumE2TDP1zc,1601
6
+ cool_seq_tool/version.py,sha256=hs3N9Wl67casrrQa2sGIAcpcaUySVk4oLE7JffoQuCI,53
7
+ cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
8
+ cool_seq_tool/handlers/seqrepo_access.py,sha256=JB3cg7YiV2JKa7ImJXz4WtP9XWShk9qYvhCCrZnBQ6M,8983
9
+ cool_seq_tool/mappers/__init__.py,sha256=SMSf6sPcu7mdQNuJ4Cj1mbOwFUPuMdFSf0noY4XvTxE,262
10
+ cool_seq_tool/mappers/alignment.py,sha256=6Vk4XEar54ivuH8N7oBqa9gUa8E5GjWCI9hC1HCkM18,9552
11
+ cool_seq_tool/mappers/exon_genomic_coords.py,sha256=tOmo6kFGcFIRmLBQwSsIZUSiratiyACf946YKV_IU78,38544
12
+ cool_seq_tool/mappers/mane_transcript.py,sha256=RrVRUS4IqxxX-HyamNLqpQ_WVWABgiLqwmmIh92uny8,49264
13
+ cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
14
+ cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oIjqk2sYQM,3837
15
+ cool_seq_tool/resources/status.py,sha256=ENsLiwSxzJOLOsY5IKDM805UWbQAOV3w9s7Rv_FLAUs,5761
16
+ cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
17
+ cool_seq_tool/routers/__init__.py,sha256=7SqhLv6_mDPpK1Q0L9aykmjhCmsymFqgbSWZH8LuCW0,437
18
+ cool_seq_tool/routers/default.py,sha256=zqeQmHmfGUvV32xLbN-fUfYnK_UI1gpqIL8Eu5Y8KzY,3928
19
+ cool_seq_tool/routers/mane.py,sha256=boZKP5PH0BAcqEeTBBr9Z3EMY4lhvLLX-pJxUqjBZQ0,3508
20
+ cool_seq_tool/routers/mappings.py,sha256=UJaip0QvRfK3Lk3eVuwofUwg2XJqMV5OVY9OLcpnWS4,6061
21
+ cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
22
+ cool_seq_tool/sources/mane_transcript_mappings.py,sha256=9Rd_tRCrTk9i9Urp-pMMttq4cCbIJaEJ0n8rM9y9-7I,4077
23
+ cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
24
+ cool_seq_tool/sources/uta_database.py,sha256=GJHhYbH130YJo9FIRroR8eavlbaziMwI0JVNP8IPGPM,45636
25
+ cool_seq_tool-0.4.1.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
26
+ cool_seq_tool-0.4.1.dist-info/METADATA,sha256=CnZwl-rVLfY6kcVkQKYsYziT19q48qHRkYFQ96-OCx0,6262
27
+ cool_seq_tool-0.4.1.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
28
+ cool_seq_tool-0.4.1.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
29
+ cool_seq_tool-0.4.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.42.0)
2
+ Generator: setuptools (70.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,2 +0,0 @@
1
- """Module for data"""
2
- from .data_downloads import DataDownload
@@ -1,89 +0,0 @@
1
- """Handle acquisition of external data."""
2
- import datetime
3
- import gzip
4
- import logging
5
- import shutil
6
- from ftplib import FTP
7
- from pathlib import Path
8
-
9
- from dateutil import parser
10
-
11
- from cool_seq_tool import APP_ROOT
12
-
13
- logger = logging.getLogger("cool_seq_tool")
14
-
15
-
16
- class DataDownload:
17
- """Manage downloadable data files. Responsible for checking if files are available
18
- under expected locations, and fetching them if not.
19
-
20
- Relevant methods are called automatically by data classes; users should not have
21
- to interact with this class under normal circumstances.
22
- """
23
-
24
- def __init__(self) -> None:
25
- """Initialize downloadable data locations."""
26
- self._data_dir = APP_ROOT / "data"
27
-
28
- def get_mane_summary(self) -> Path:
29
- """Identify latest MANE summary data. If unavailable locally, download from
30
- `NCBI FTP server <https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/current/>`_.
31
-
32
- :return: path to MANE summary file
33
- """
34
- with FTP("ftp.ncbi.nlm.nih.gov") as ftp:
35
- ftp.login()
36
- ftp.cwd("/refseq/MANE/MANE_human/current")
37
- files = ftp.nlst()
38
- mane_summary_file = [f for f in files if f.endswith(".summary.txt.gz")]
39
- if not mane_summary_file:
40
- msg = "Unable to download MANE summary data"
41
- raise Exception(msg)
42
- mane_summary_file = mane_summary_file[0]
43
- self._mane_summary_path = self._data_dir / mane_summary_file[:-3]
44
- mane_data_path = self._data_dir / mane_summary_file
45
- if not self._mane_summary_path.exists():
46
- logger.info("Downloading MANE summary file from NCBI.")
47
- with mane_data_path.open("wb") as fp:
48
- ftp.retrbinary(f"RETR {mane_summary_file}", fp.write)
49
- with gzip.open(
50
- mane_data_path, "rb"
51
- ) as f_in, self._mane_summary_path.open("wb") as f_out:
52
- shutil.copyfileobj(f_in, f_out)
53
- mane_data_path.unlink()
54
- logger.info("MANE summary file download complete.")
55
- return self._mane_summary_path
56
-
57
- def get_lrg_refseq_gene_data(self) -> Path:
58
- """Identify latest LRG RefSeq Gene file. If unavailable locally, download from
59
- `NCBI FTP server <https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/>`_.
60
-
61
- :return: path to acquired LRG RefSeq Gene data file
62
- """
63
- with FTP("ftp.ncbi.nlm.nih.gov") as ftp:
64
- ftp.login()
65
- lrg_refseqgene_file = "LRG_RefSeqGene"
66
- ftp_dir_path = "/refseq/H_sapiens/RefSeqGene/"
67
- ftp_file_path = f"{ftp_dir_path}{lrg_refseqgene_file}"
68
- timestamp = ftp.voidcmd(f"MDTM {ftp_file_path}")[4:].strip()
69
- date = str(parser.parse(timestamp)).split()[0]
70
- version = (
71
- datetime.datetime.strptime(date, "%Y-%m-%d")
72
- .astimezone(tz=datetime.timezone.utc)
73
- .strftime("%Y%m%d")
74
- )
75
- fn_versioned = f"{lrg_refseqgene_file}_{version}"
76
- lrg_refseqgene_path = self._data_dir / lrg_refseqgene_file
77
- self._lrg_refseqgene_path = self._data_dir / fn_versioned
78
- if not self._lrg_refseqgene_path.exists():
79
- logger.info("Downloading LRG RefSeq data from NCBI.")
80
- ftp.cwd(ftp_dir_path)
81
- with lrg_refseqgene_path.open("wb") as fp:
82
- ftp.retrbinary(f"RETR {lrg_refseqgene_file}", fp.write)
83
- with lrg_refseqgene_path.open(
84
- "rb"
85
- ) as f_in, self._lrg_refseqgene_path.open("wb") as f_out:
86
- shutil.copyfileobj(f_in, f_out)
87
- lrg_refseqgene_path.unlink()
88
- logger.info("LRG RefSeq data download complete.")
89
- return self._lrg_refseqgene_path
cool_seq_tool/paths.py DELETED
@@ -1,28 +0,0 @@
1
- """Provide paths to shared files, and trigger data acquisition if unavailable."""
2
- from os import environ
3
- from pathlib import Path
4
-
5
- from cool_seq_tool.data.data_downloads import DataDownload
6
-
7
- APP_ROOT = Path(__file__).resolve().parents[0]
8
-
9
- TRANSCRIPT_MAPPINGS_PATH = Path(
10
- environ.get("TRANSCRIPT_MAPPINGS_PATH", f"{APP_ROOT}/data/transcript_mapping.tsv")
11
- )
12
-
13
- d = DataDownload()
14
-
15
- provided_mane_summary_path = environ.get("MANE_SUMMARY_PATH", "")
16
- if provided_mane_summary_path:
17
- MANE_SUMMARY_PATH = Path(provided_mane_summary_path)
18
- else:
19
- MANE_SUMMARY_PATH = d.get_mane_summary()
20
-
21
- provided_lrg_refseq_path = environ.get("LRG_REFSEQGENE_PATH", "")
22
- if provided_lrg_refseq_path:
23
- LRG_REFSEQGENE_PATH = Path(provided_lrg_refseq_path)
24
- else:
25
- LRG_REFSEQGENE_PATH = d.get_lrg_refseq_gene_data()
26
-
27
-
28
- SEQREPO_ROOT_DIR = environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest")