cool-seq-tool 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/__init__.py +7 -9
- cool_seq_tool/app.py +6 -1
- cool_seq_tool/handlers/seqrepo_access.py +14 -10
- cool_seq_tool/mappers/__init__.py +2 -1
- cool_seq_tool/mappers/exon_genomic_coords.py +65 -52
- cool_seq_tool/mappers/liftover.py +90 -0
- cool_seq_tool/mappers/mane_transcript.py +124 -27
- cool_seq_tool/resources/status.py +7 -5
- cool_seq_tool/schemas.py +9 -17
- cool_seq_tool/sources/mane_transcript_mappings.py +2 -2
- cool_seq_tool/sources/uta_database.py +45 -219
- cool_seq_tool/utils.py +42 -2
- {cool_seq_tool-0.4.1.dist-info → cool_seq_tool-0.5.1.dist-info}/METADATA +8 -10
- cool_seq_tool-0.5.1.dist-info/RECORD +24 -0
- {cool_seq_tool-0.4.1.dist-info → cool_seq_tool-0.5.1.dist-info}/WHEEL +1 -1
- cool_seq_tool/api.py +0 -41
- cool_seq_tool/routers/__init__.py +0 -17
- cool_seq_tool/routers/default.py +0 -126
- cool_seq_tool/routers/mane.py +0 -98
- cool_seq_tool/routers/mappings.py +0 -155
- cool_seq_tool/version.py +0 -3
- cool_seq_tool-0.4.1.dist-info/RECORD +0 -29
- {cool_seq_tool-0.4.1.dist-info → cool_seq_tool-0.5.1.dist-info}/LICENSE +0 -0
- {cool_seq_tool-0.4.1.dist-info → cool_seq_tool-0.5.1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,6 @@
|
|
1
1
|
"""Provide transcript lookup and metadata tools via the UTA database."""
|
2
2
|
|
3
3
|
import ast
|
4
|
-
import base64
|
5
4
|
import logging
|
6
5
|
from os import environ
|
7
6
|
from typing import Any, Literal, TypeVar
|
@@ -11,7 +10,6 @@ from urllib.parse import quote, unquote, urlparse
|
|
11
10
|
import asyncpg
|
12
11
|
import boto3
|
13
12
|
import polars as pl
|
14
|
-
from agct import Converter, Genome
|
15
13
|
from asyncpg.exceptions import InterfaceError, InvalidAuthorizationSpecificationError
|
16
14
|
from botocore.exceptions import ClientError
|
17
15
|
|
@@ -20,46 +18,11 @@ from cool_seq_tool.schemas import AnnotationLayer, Assembly, Strand
|
|
20
18
|
# use `bound` to upper-bound UtaDatabase or child classes
|
21
19
|
UTADatabaseType = TypeVar("UTADatabaseType", bound="UtaDatabase")
|
22
20
|
|
23
|
-
# Environment variables for paths to chain files for agct
|
24
|
-
LIFTOVER_CHAIN_37_TO_38 = environ.get("LIFTOVER_CHAIN_37_TO_38")
|
25
|
-
LIFTOVER_CHAIN_38_TO_37 = environ.get("LIFTOVER_CHAIN_38_TO_37")
|
26
|
-
|
27
21
|
UTA_DB_URL = environ.get(
|
28
22
|
"UTA_DB_URL", "postgresql://uta_admin:uta@localhost:5432/uta/uta_20210129b"
|
29
23
|
)
|
30
24
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
def get_liftover(
|
35
|
-
chain_file_37_to_38: str | None = None, chain_file_38_to_37: str | None = None
|
36
|
-
) -> tuple[Converter, Converter]:
|
37
|
-
"""Fetch Converter instances between GRCh37 and 38.
|
38
|
-
|
39
|
-
Factored out of the UTA Database initialization method to support less expensive
|
40
|
-
status check-type operations.
|
41
|
-
|
42
|
-
:param chain_file_37_to_38: Optional path to chain file for 37 to 38 assembly.
|
43
|
-
This is used for ``agct``. If this is not provided, will check to see
|
44
|
-
if ``LIFTOVER_CHAIN_37_TO_38`` env var is set. If neither is provided, will
|
45
|
-
allow ``agct`` to download a chain file from UCSC
|
46
|
-
:param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly.
|
47
|
-
This is used for ``agct``. If this is not provided, will check to see
|
48
|
-
if ``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will
|
49
|
-
allow ``agct`` to download a chain file from UCSC
|
50
|
-
:return: converters (37->38, 38->37)
|
51
|
-
"""
|
52
|
-
chain_file_37_to_38 = chain_file_37_to_38 or LIFTOVER_CHAIN_37_TO_38
|
53
|
-
if chain_file_37_to_38:
|
54
|
-
converter_37_to_38 = Converter(chainfile=chain_file_37_to_38)
|
55
|
-
else:
|
56
|
-
converter_37_to_38 = Converter(from_db=Genome.HG19, to_db=Genome.HG38)
|
57
|
-
chain_file_38_to_37 = chain_file_38_to_37 or LIFTOVER_CHAIN_38_TO_37
|
58
|
-
if chain_file_38_to_37:
|
59
|
-
converter_38_to_37 = Converter(chainfile=chain_file_38_to_37)
|
60
|
-
else:
|
61
|
-
converter_38_to_37 = Converter(from_db=Genome.HG38, to_db=Genome.HG19)
|
62
|
-
return (converter_37_to_38, converter_38_to_37)
|
25
|
+
_logger = logging.getLogger(__name__)
|
63
26
|
|
64
27
|
|
65
28
|
class UtaDatabase:
|
@@ -75,34 +38,18 @@ class UtaDatabase:
|
|
75
38
|
>>> uta_db = asyncio.run(UtaDatabase.create())
|
76
39
|
"""
|
77
40
|
|
78
|
-
def __init__(
|
79
|
-
self,
|
80
|
-
db_url: str = UTA_DB_URL,
|
81
|
-
chain_file_37_to_38: str | None = None,
|
82
|
-
chain_file_38_to_37: str | None = None,
|
83
|
-
) -> None:
|
41
|
+
def __init__(self, db_url: str = UTA_DB_URL) -> None:
|
84
42
|
"""Initialize DB class. Should only be used by ``create()`` method, and not
|
85
43
|
be called directly by a user.
|
86
44
|
|
87
45
|
:param db_url: PostgreSQL connection URL
|
88
46
|
Format: ``driver://user:password@host/database/schema``
|
89
|
-
:param chain_file_37_to_38: Optional path to chain file for 37 to 38 assembly.
|
90
|
-
This is used for ``agct``. If this is not provided, will check to see
|
91
|
-
if ``LIFTOVER_CHAIN_37_TO_38`` env var is set. If neither is provided, will
|
92
|
-
allow ``agct`` to download a chain file from UCSC
|
93
|
-
:param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly.
|
94
|
-
This is used for ``agct``. If this is not provided, will check to see
|
95
|
-
if ``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will
|
96
|
-
allow ``agct`` to download a chain file from UCSC
|
97
47
|
"""
|
98
48
|
self.schema = None
|
99
49
|
self._connection_pool = None
|
100
50
|
original_pwd = db_url.split("//")[-1].split("@")[0].split(":")[-1]
|
101
51
|
self.db_url = db_url.replace(original_pwd, quote(original_pwd))
|
102
52
|
self.args = self._get_conn_args()
|
103
|
-
self.liftover_37_to_38, self.liftover_38_to_37 = get_liftover(
|
104
|
-
chain_file_37_to_38, chain_file_38_to_37
|
105
|
-
)
|
106
53
|
|
107
54
|
def _get_conn_args(self) -> dict:
|
108
55
|
"""Return connection arguments.
|
@@ -160,7 +107,7 @@ class UtaDatabase:
|
|
160
107
|
database=self.args["database"],
|
161
108
|
)
|
162
109
|
except InterfaceError as e:
|
163
|
-
|
110
|
+
_logger.error(
|
164
111
|
"While creating connection pool, encountered exception %s", e
|
165
112
|
)
|
166
113
|
msg = "Could not create connection pool"
|
@@ -223,7 +170,7 @@ class UtaDatabase:
|
|
223
170
|
genomic_table_exists = await self.execute_query(check_table_exists)
|
224
171
|
genomic_table_exists = genomic_table_exists[0].get("exists")
|
225
172
|
if genomic_table_exists is None:
|
226
|
-
|
173
|
+
_logger.critical(
|
227
174
|
"SELECT EXISTS query in UtaDatabase._create_genomic_table "
|
228
175
|
"returned invalid response"
|
229
176
|
)
|
@@ -284,7 +231,7 @@ class UtaDatabase:
|
|
284
231
|
(i.e. ``1`` or ``X``). If not provided, must provide ``alt_ac``.
|
285
232
|
If ``alt_ac`` is also provided, ``alt_ac`` will be used.
|
286
233
|
:param alt_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided,
|
287
|
-
must provide ``chromosome
|
234
|
+
must provide ``chromosome``. If ``chromosome`` is also provided, ``alt_ac``
|
288
235
|
will be used.
|
289
236
|
:param gene: Gene symbol
|
290
237
|
:return: Dictionary containing genes and genomic accessions and warnings if found
|
@@ -366,7 +313,7 @@ class UtaDatabase:
|
|
366
313
|
|
367
314
|
if not result:
|
368
315
|
msg = f"Unable to get exons for {tx_ac}"
|
369
|
-
|
316
|
+
_logger.warning(msg)
|
370
317
|
return None, msg
|
371
318
|
tx_exons = [(r["tx_start_i"], r["tx_end_i"]) for r in result]
|
372
319
|
return tx_exons, None
|
@@ -393,7 +340,7 @@ class UtaDatabase:
|
|
393
340
|
|
394
341
|
if not result:
|
395
342
|
msg = f"Unable to get exons and genomic coordinates for {tx_ac} on {alt_ac}"
|
396
|
-
|
343
|
+
_logger.warning(msg)
|
397
344
|
return None, msg
|
398
345
|
tx_exons_genomic_coords = [
|
399
346
|
(r["ord"], r["tx_start_i"], r["tx_end_i"], r["alt_start_i"], r["alt_end_i"])
|
@@ -438,7 +385,7 @@ class UtaDatabase:
|
|
438
385
|
)
|
439
386
|
if gene_query:
|
440
387
|
msg += f" on gene {gene}"
|
441
|
-
|
388
|
+
_logger.warning(msg)
|
442
389
|
return None, msg
|
443
390
|
result = result[0]
|
444
391
|
return (result[0], result[1], result[2], result[3], result[4]), None
|
@@ -462,7 +409,7 @@ class UtaDatabase:
|
|
462
409
|
if cds_start_end[0] is not None and cds_start_end[1] is not None: # noqa: RET503
|
463
410
|
return cds_start_end[0], cds_start_end[1]
|
464
411
|
else:
|
465
|
-
|
412
|
+
_logger.warning(
|
466
413
|
"Unable to get coding start/end site for accession: %s", tx_ac
|
467
414
|
)
|
468
415
|
return None
|
@@ -535,7 +482,7 @@ class UtaDatabase:
|
|
535
482
|
""" # noqa: S608
|
536
483
|
result = await self.execute_query(query)
|
537
484
|
if not result:
|
538
|
-
|
485
|
+
_logger.warning("Accession %s does not have a description", ac)
|
539
486
|
return None
|
540
487
|
result = result[0][0]
|
541
488
|
if result == "":
|
@@ -607,10 +554,10 @@ class UtaDatabase:
|
|
607
554
|
""" # noqa: S608
|
608
555
|
result = await self.execute_query(query)
|
609
556
|
if not result:
|
610
|
-
|
557
|
+
_logger.warning("Unable to find transcript alignment for query: %s", query)
|
611
558
|
return []
|
612
559
|
if alt_ac and not use_tx_pos and len(result) > 1:
|
613
|
-
|
560
|
+
_logger.debug(
|
614
561
|
"Found more than one match for tx_ac %s and alt_ac = %s",
|
615
562
|
temp_ac,
|
616
563
|
alt_ac,
|
@@ -633,7 +580,7 @@ class UtaDatabase:
|
|
633
580
|
alt_exon_id = result[10]
|
634
581
|
|
635
582
|
if (tx_pos_range[1] - tx_pos_range[0]) != (alt_pos_range[1] - alt_pos_range[0]):
|
636
|
-
|
583
|
+
_logger.warning(
|
637
584
|
"tx_pos_range %s is not the same length as alt_pos_range %s.",
|
638
585
|
tx_pos_range,
|
639
586
|
alt_pos_range,
|
@@ -691,7 +638,7 @@ class UtaDatabase:
|
|
691
638
|
|
692
639
|
coding_start_site = await self.get_cds_start_end(ac)
|
693
640
|
if coding_start_site is None:
|
694
|
-
|
641
|
+
_logger.warning("Accession %s not found in UTA", ac)
|
695
642
|
return None
|
696
643
|
|
697
644
|
data["tx_ac"] = result[1]
|
@@ -833,12 +780,12 @@ class UtaDatabase:
|
|
833
780
|
""" # noqa: S608
|
834
781
|
results = await self.execute_query(query)
|
835
782
|
if not results:
|
836
|
-
|
783
|
+
_logger.warning(
|
837
784
|
"Unable to find gene between %s and %s on %s", start_pos, end_pos, ac
|
838
785
|
)
|
839
786
|
return None
|
840
787
|
if len(results) > 1:
|
841
|
-
|
788
|
+
_logger.info(
|
842
789
|
"Found more than one gene between %s and %s on %s",
|
843
790
|
start_pos,
|
844
791
|
end_pos,
|
@@ -922,16 +869,26 @@ class UtaDatabase:
|
|
922
869
|
results = [
|
923
870
|
(r["pro_ac"], r["tx_ac"], r["alt_ac"], r["cds_start_i"]) for r in results
|
924
871
|
]
|
925
|
-
results_df = pl.DataFrame(results, schema=schema)
|
872
|
+
results_df = pl.DataFrame(results, schema=schema, orient="row")
|
926
873
|
if results:
|
927
874
|
results_df = results_df.unique()
|
928
875
|
return results_df
|
929
876
|
|
930
|
-
async def get_chr_assembly(self, ac: str) -> tuple[str,
|
877
|
+
async def get_chr_assembly(self, ac: str) -> tuple[str, Assembly] | None:
|
931
878
|
"""Get chromosome and assembly for NC accession if not in GRCh38.
|
932
879
|
|
933
|
-
|
934
|
-
|
880
|
+
>>> import asyncio
|
881
|
+
>>> from cool_seq_tool.sources.uta_database import UtaDatabase
|
882
|
+
>>> uta_db = asyncio.run(UtaDatabase.create())
|
883
|
+
>>> result = asyncio.run(uta_db.get_chr_assembly("NC_000007.13"))
|
884
|
+
>>> result
|
885
|
+
('chr7', <Assembly.GRCH37: 'GRCh37'>)
|
886
|
+
|
887
|
+
Returns ``None`` if unable to find (either unrecognized/invalid, or
|
888
|
+
a GRCh38 accession).
|
889
|
+
|
890
|
+
:param ac: RefSeq NC accession, eg ``"NC_000007.13"``
|
891
|
+
:return: Chromosome and assembly that accession is on, if available.
|
935
892
|
"""
|
936
893
|
descr = await self.get_ac_descr(ac)
|
937
894
|
if not descr:
|
@@ -941,135 +898,14 @@ class UtaDatabase:
|
|
941
898
|
chromosome = f"chr{descr[0].split()[-1]}"
|
942
899
|
assembly = f"GRCh{descr[1].split('.')[0].split('GRCh')[-1]}"
|
943
900
|
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
)
|
901
|
+
try:
|
902
|
+
assembly = Assembly(assembly)
|
903
|
+
except ValueError as e:
|
904
|
+
_logger.error(e)
|
949
905
|
return None
|
950
906
|
|
951
907
|
return chromosome, assembly
|
952
908
|
|
953
|
-
async def liftover_to_38(self, genomic_tx_data: dict) -> None:
|
954
|
-
"""Liftover genomic_tx_data to hg38 assembly.
|
955
|
-
|
956
|
-
:param genomic_tx_data: Dictionary containing gene, nc_accession, alt_pos, and
|
957
|
-
strand
|
958
|
-
"""
|
959
|
-
descr = await self.get_chr_assembly(genomic_tx_data["alt_ac"])
|
960
|
-
if descr is None:
|
961
|
-
# already grch38
|
962
|
-
return
|
963
|
-
chromosome, _ = descr
|
964
|
-
|
965
|
-
query = f"""
|
966
|
-
SELECT DISTINCT alt_ac
|
967
|
-
FROM {self.schema}.tx_exon_aln_v
|
968
|
-
WHERE tx_ac = '{genomic_tx_data['tx_ac']}';
|
969
|
-
""" # noqa: S608
|
970
|
-
nc_acs = await self.execute_query(query)
|
971
|
-
nc_acs = [nc_ac[0] for nc_ac in nc_acs]
|
972
|
-
if nc_acs == [genomic_tx_data["alt_ac"]]:
|
973
|
-
logger.warning(
|
974
|
-
"UTA does not have GRCh38 assembly for %s",
|
975
|
-
genomic_tx_data["alt_ac"].split(".")[0],
|
976
|
-
)
|
977
|
-
return
|
978
|
-
|
979
|
-
# Get most recent assembly version position
|
980
|
-
# Liftover range
|
981
|
-
self._set_liftover(
|
982
|
-
genomic_tx_data, "alt_pos_range", chromosome, Assembly.GRCH38
|
983
|
-
)
|
984
|
-
|
985
|
-
# Liftover changes range
|
986
|
-
self._set_liftover(
|
987
|
-
genomic_tx_data, "alt_pos_change_range", chromosome, Assembly.GRCH38
|
988
|
-
)
|
989
|
-
|
990
|
-
# Change alt_ac to most recent
|
991
|
-
if genomic_tx_data["alt_ac"].startswith("EN"):
|
992
|
-
order_by_cond = "ORDER BY alt_ac DESC;"
|
993
|
-
else:
|
994
|
-
order_by_cond = """
|
995
|
-
ORDER BY CAST(SUBSTR(alt_ac, position('.' in alt_ac) + 1,
|
996
|
-
LENGTH(alt_ac)) AS INT) DESC;
|
997
|
-
"""
|
998
|
-
query = f"""
|
999
|
-
SELECT alt_ac
|
1000
|
-
FROM {self.schema}.genomic
|
1001
|
-
WHERE alt_ac LIKE '{genomic_tx_data['alt_ac'].split('.')[0]}%'
|
1002
|
-
{order_by_cond}
|
1003
|
-
""" # noqa: S608
|
1004
|
-
nc_acs = await self.execute_query(query)
|
1005
|
-
genomic_tx_data["alt_ac"] = nc_acs[0][0]
|
1006
|
-
|
1007
|
-
def get_liftover(
|
1008
|
-
self, chromosome: str, pos: int, liftover_to_assembly: Assembly
|
1009
|
-
) -> tuple[str, int] | None:
|
1010
|
-
"""Get new genome assembly data for a position on a chromosome.
|
1011
|
-
|
1012
|
-
:param chromosome: The chromosome number. Must be prefixed with ``chr``
|
1013
|
-
:param pos: Position on the chromosome
|
1014
|
-
:param liftover_to_assembly: Assembly to liftover to
|
1015
|
-
:return: Target chromosome and target position for assembly
|
1016
|
-
"""
|
1017
|
-
if not chromosome.startswith("chr"):
|
1018
|
-
logger.warning("`chromosome` must be prefixed with chr")
|
1019
|
-
return None
|
1020
|
-
|
1021
|
-
if liftover_to_assembly == Assembly.GRCH38:
|
1022
|
-
liftover = self.liftover_37_to_38.convert_coordinate(chromosome, pos)
|
1023
|
-
elif liftover_to_assembly == Assembly.GRCH37:
|
1024
|
-
liftover = self.liftover_38_to_37.convert_coordinate(chromosome, pos)
|
1025
|
-
else:
|
1026
|
-
logger.warning("%s assembly not supported", liftover_to_assembly)
|
1027
|
-
liftover = None
|
1028
|
-
|
1029
|
-
if not liftover:
|
1030
|
-
logger.warning("%s does not exist on %s", pos, chromosome)
|
1031
|
-
return None
|
1032
|
-
return liftover[0][:2]
|
1033
|
-
|
1034
|
-
def _set_liftover(
|
1035
|
-
self,
|
1036
|
-
genomic_tx_data: dict,
|
1037
|
-
key: str,
|
1038
|
-
chromosome: str,
|
1039
|
-
liftover_to_assembly: Assembly,
|
1040
|
-
) -> None:
|
1041
|
-
"""Update genomic_tx_data to have coordinates for given assembly.
|
1042
|
-
|
1043
|
-
:param genomic_tx_data: Dictionary containing gene, nc_accession, alt_pos, and
|
1044
|
-
strand
|
1045
|
-
:param key: Key to access coordinate positions
|
1046
|
-
:param chromosome: Chromosome, must be prefixed with ``chr``
|
1047
|
-
:param liftover_to_assembly: Assembly to liftover to
|
1048
|
-
"""
|
1049
|
-
liftover_start_i = self.get_liftover(
|
1050
|
-
chromosome, genomic_tx_data[key][0], liftover_to_assembly
|
1051
|
-
)
|
1052
|
-
if liftover_start_i is None:
|
1053
|
-
logger.warning(
|
1054
|
-
"Unable to liftover position %s on %s",
|
1055
|
-
genomic_tx_data[key][0],
|
1056
|
-
chromosome,
|
1057
|
-
)
|
1058
|
-
return
|
1059
|
-
|
1060
|
-
liftover_end_i = self.get_liftover(
|
1061
|
-
chromosome, genomic_tx_data[key][1], liftover_to_assembly
|
1062
|
-
)
|
1063
|
-
if liftover_end_i is None:
|
1064
|
-
logger.warning(
|
1065
|
-
"Unable to liftover position %s on %s",
|
1066
|
-
genomic_tx_data[key][1],
|
1067
|
-
chromosome,
|
1068
|
-
)
|
1069
|
-
return
|
1070
|
-
|
1071
|
-
genomic_tx_data[key] = liftover_start_i[1], liftover_end_i[1]
|
1072
|
-
|
1073
909
|
async def p_to_c_ac(self, p_ac: str) -> list[str]:
|
1074
910
|
"""Return cDNA reference sequence accession from protein reference sequence
|
1075
911
|
accession (i.e. ``p.`` to ``c.`` in HGVS syntax)
|
@@ -1121,7 +957,12 @@ class UtaDatabase:
|
|
1121
957
|
|
1122
958
|
@staticmethod
|
1123
959
|
def get_secret() -> str:
|
1124
|
-
"""Get secrets for UTA DB instances. Used for deployment on AWS.
|
960
|
+
"""Get secrets for UTA DB instances. Used for deployment on AWS.
|
961
|
+
|
962
|
+
:raises ClientError: If unable to retrieve secret value due to decryption
|
963
|
+
decryption failure, internal service error, invalid parameter, invalid
|
964
|
+
request, or resource not found.
|
965
|
+
"""
|
1125
966
|
secret_name = environ["UTA_DB_SECRET"]
|
1126
967
|
region_name = "us-east-2"
|
1127
968
|
|
@@ -1132,27 +973,12 @@ class UtaDatabase:
|
|
1132
973
|
try:
|
1133
974
|
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
|
1134
975
|
except ClientError as e:
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1139
|
-
# An error occurred on the server side.
|
1140
|
-
"InternalServiceErrorException",
|
1141
|
-
# You provided an invalid value for a parameter.
|
1142
|
-
"InvalidParameterException",
|
1143
|
-
# You provided a parameter value that is not valid for the current state of the resource.
|
1144
|
-
"InvalidRequestException",
|
1145
|
-
# We can"t find the resource that you asked for.
|
1146
|
-
"ResourceNotFoundException",
|
1147
|
-
}:
|
1148
|
-
raise e
|
976
|
+
# For a list of exceptions thrown, see
|
977
|
+
# https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
|
978
|
+
_logger.error(e)
|
979
|
+
raise e
|
1149
980
|
else:
|
1150
|
-
|
1151
|
-
# Depending on whether the secret is a string or binary,
|
1152
|
-
# one of these fields will be populated.
|
1153
|
-
if "SecretString" in get_secret_value_response:
|
1154
|
-
return get_secret_value_response["SecretString"]
|
1155
|
-
return base64.b64decode(get_secret_value_response["SecretBinary"])
|
981
|
+
return get_secret_value_response["SecretString"]
|
1156
982
|
|
1157
983
|
|
1158
984
|
class ParseResult(UrlLibParseResult):
|
cool_seq_tool/utils.py
CHANGED
@@ -3,10 +3,12 @@
|
|
3
3
|
import datetime
|
4
4
|
import logging
|
5
5
|
|
6
|
+
from bioutils.accessions import chr22XY
|
7
|
+
|
8
|
+
from cool_seq_tool import __version__
|
6
9
|
from cool_seq_tool.schemas import ResidueMode, ServiceMeta
|
7
|
-
from cool_seq_tool.version import __version__
|
8
10
|
|
9
|
-
|
11
|
+
_logger = logging.getLogger(__name__)
|
10
12
|
|
11
13
|
|
12
14
|
def get_inter_residue_pos(
|
@@ -47,3 +49,41 @@ def service_meta() -> ServiceMeta:
|
|
47
49
|
version=__version__,
|
48
50
|
response_datetime=datetime.datetime.now(tz=datetime.timezone.utc),
|
49
51
|
)
|
52
|
+
|
53
|
+
|
54
|
+
def process_chromosome_input(chromosome: str, context: str = "") -> str:
|
55
|
+
"""Perform processing on a chromosome arg.
|
56
|
+
|
57
|
+
E.g.
|
58
|
+
|
59
|
+
>>> from cool_seq_tool.utils import process_chromosome_input
|
60
|
+
>>> process_chromosome_input("7")
|
61
|
+
'chr7'
|
62
|
+
>>> process_chromosome_input("x")
|
63
|
+
'chrX'
|
64
|
+
>>> process_chromosome_input("chr7")
|
65
|
+
'chr7'
|
66
|
+
|
67
|
+
In the future, we could also use this method to be more opinionated about legal
|
68
|
+
chromosome values, or throw exceptions in the event of invalid or unrecognized
|
69
|
+
terms.
|
70
|
+
|
71
|
+
:param chromosome: user-provided chromosome input
|
72
|
+
:param context: calling context to provide in log
|
73
|
+
:return: processed chromosome value. Idempotent -- returns original value if no
|
74
|
+
changes needed.
|
75
|
+
"""
|
76
|
+
original_chromosome_value = chromosome
|
77
|
+
if chromosome.lower().startswith("chr"):
|
78
|
+
chromosome = f"chr{chromosome[3:].upper()}"
|
79
|
+
else:
|
80
|
+
chromosome = chromosome.upper()
|
81
|
+
chromosome = chr22XY(chromosome)
|
82
|
+
if original_chromosome_value != chromosome:
|
83
|
+
_logger.warning(
|
84
|
+
"Transformed provided chromosome value from `%s` to `%s` in `%s`",
|
85
|
+
original_chromosome_value,
|
86
|
+
chromosome,
|
87
|
+
context if context else "cool_seq_tool",
|
88
|
+
)
|
89
|
+
return chromosome
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cool_seq_tool
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.1
|
4
4
|
Summary: Common Operation on Lots of Sequences Tool
|
5
5
|
Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
|
6
6
|
License: MIT License
|
@@ -31,7 +31,6 @@ Project-URL: Changelog, https://github.com/genomicmedlab/cool-seq-tool/releases
|
|
31
31
|
Project-URL: Source, https://github.com/genomicmedlab/cool-seq-tool
|
32
32
|
Project-URL: Bug Tracker, https://github.com/genomicmedlab/cool-seq-tool/issues
|
33
33
|
Classifier: Development Status :: 3 - Alpha
|
34
|
-
Classifier: Framework :: FastAPI
|
35
34
|
Classifier: Framework :: Pydantic
|
36
35
|
Classifier: Framework :: Pydantic :: 2
|
37
36
|
Classifier: Intended Audience :: Science/Research
|
@@ -53,12 +52,11 @@ Requires-Dist: polars ~=1.0
|
|
53
52
|
Requires-Dist: hgvs
|
54
53
|
Requires-Dist: biocommons.seqrepo
|
55
54
|
Requires-Dist: pydantic ==2.*
|
56
|
-
Requires-Dist: uvicorn
|
57
|
-
Requires-Dist: fastapi
|
58
55
|
Requires-Dist: ga4gh.vrs
|
59
56
|
Requires-Dist: wags-tails ~=0.1.3
|
57
|
+
Requires-Dist: bioutils
|
60
58
|
Provides-Extra: dev
|
61
|
-
Requires-Dist: pre-commit ; extra == 'dev'
|
59
|
+
Requires-Dist: pre-commit >=3.7.1 ; extra == 'dev'
|
62
60
|
Requires-Dist: ipython ; extra == 'dev'
|
63
61
|
Requires-Dist: ipykernel ; extra == 'dev'
|
64
62
|
Requires-Dist: psycopg2-binary ; extra == 'dev'
|
@@ -71,11 +69,11 @@ Requires-Dist: sphinx-copybutton ==0.5.2 ; extra == 'docs'
|
|
71
69
|
Requires-Dist: sphinxext-opengraph ==0.8.2 ; extra == 'docs'
|
72
70
|
Requires-Dist: furo ==2023.3.27 ; extra == 'docs'
|
73
71
|
Requires-Dist: sphinx-github-changelog ==1.2.1 ; extra == 'docs'
|
74
|
-
Provides-Extra:
|
75
|
-
Requires-Dist: pytest ; extra == '
|
76
|
-
Requires-Dist: pytest-cov ; extra == '
|
77
|
-
Requires-Dist: pytest-asyncio ==0.18.3 ; extra == '
|
78
|
-
Requires-Dist: mock ; extra == '
|
72
|
+
Provides-Extra: test
|
73
|
+
Requires-Dist: pytest ; extra == 'test'
|
74
|
+
Requires-Dist: pytest-cov ; extra == 'test'
|
75
|
+
Requires-Dist: pytest-asyncio ==0.18.3 ; extra == 'test'
|
76
|
+
Requires-Dist: mock ; extra == 'test'
|
79
77
|
|
80
78
|
<h1 align="center">
|
81
79
|
CoolSeqTool
|
@@ -0,0 +1,24 @@
|
|
1
|
+
cool_seq_tool/__init__.py,sha256=fJmjglvv3Ylm0khQSD-XTqdyUA5YzEiS3iB8FGTOhIs,247
|
2
|
+
cool_seq_tool/app.py,sha256=DJFcPVHQ5Ar9xdmHwrFKFMqbjDtx3L9gn84_wP63ARY,4982
|
3
|
+
cool_seq_tool/schemas.py,sha256=hZ4pStUHgCarXPFLkuGU26znC0dooVDvixO_7eO5eUQ,16301
|
4
|
+
cool_seq_tool/utils.py,sha256=mq_eGgqiILDcrtb1trMwRdsTERixuj8kDxHfgwsWsko,2914
|
5
|
+
cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
|
6
|
+
cool_seq_tool/handlers/seqrepo_access.py,sha256=jKUn9mdyK0rHJk9I274N9H_B-M1m4r-hmOX7VwfjRC0,9135
|
7
|
+
cool_seq_tool/mappers/__init__.py,sha256=O0JRxNFk8nWxD4v5ij47xelhvfVLdEXS43l2tzRuiUE,305
|
8
|
+
cool_seq_tool/mappers/alignment.py,sha256=6Vk4XEar54ivuH8N7oBqa9gUa8E5GjWCI9hC1HCkM18,9552
|
9
|
+
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=McLXZcnDLdLSKR3eHnY4xJ0iLfCmSwAwK_RQXBV1AYQ,39160
|
10
|
+
cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
|
11
|
+
cool_seq_tool/mappers/mane_transcript.py,sha256=iNkK8mtzXPmD1BROHzJ4vipr6oBbQv_BdUmvuOGFIMA,52823
|
12
|
+
cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
|
13
|
+
cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oIjqk2sYQM,3837
|
14
|
+
cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
|
15
|
+
cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
|
16
|
+
cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
|
17
|
+
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=IQtaRWrIi3f1k0WiDtlmlfOlQQB6bTKSEAh2PHk-Lsw,4079
|
18
|
+
cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
|
19
|
+
cool_seq_tool/sources/uta_database.py,sha256=TKMx_yoqWe5QVnqkZe_10x-Lp4PtKvArbMg5ufba0_Q,38353
|
20
|
+
cool_seq_tool-0.5.1.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
|
21
|
+
cool_seq_tool-0.5.1.dist-info/METADATA,sha256=9GLDkcYGYGfUmhlkJ8S1bfjgbzPE2adEKy4iEwsyRnU,6210
|
22
|
+
cool_seq_tool-0.5.1.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
|
23
|
+
cool_seq_tool-0.5.1.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
|
24
|
+
cool_seq_tool-0.5.1.dist-info/RECORD,,
|
cool_seq_tool/api.py
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
"""Main application for FastAPI"""
|
2
|
-
|
3
|
-
from fastapi import FastAPI
|
4
|
-
from fastapi.openapi.utils import get_openapi
|
5
|
-
|
6
|
-
from cool_seq_tool.routers import SERVICE_NAME, default, mane, mappings
|
7
|
-
from cool_seq_tool.version import __version__
|
8
|
-
|
9
|
-
app = FastAPI(
|
10
|
-
docs_url=f"/{SERVICE_NAME}",
|
11
|
-
openapi_url=f"/{SERVICE_NAME}/openapi.json",
|
12
|
-
swagger_ui_parameters={"tryItOutEnabled": True},
|
13
|
-
)
|
14
|
-
|
15
|
-
|
16
|
-
app.include_router(default.router)
|
17
|
-
app.include_router(mane.router)
|
18
|
-
app.include_router(mappings.router)
|
19
|
-
|
20
|
-
|
21
|
-
def custom_openapi() -> dict:
|
22
|
-
"""Generate custom fields for OpenAPI response."""
|
23
|
-
if app.openapi_schema:
|
24
|
-
return app.openapi_schema
|
25
|
-
openapi_schema = get_openapi(
|
26
|
-
title="The GenomicMedLab Cool-Seq-Tool",
|
27
|
-
version=__version__,
|
28
|
-
description="Common Operations On Lots of Sequences Tool.",
|
29
|
-
routes=app.routes,
|
30
|
-
)
|
31
|
-
|
32
|
-
openapi_schema["info"]["contact"] = {
|
33
|
-
"name": "Alex H. Wagner",
|
34
|
-
"email": "Alex.Wagner@nationwidechildrens.org",
|
35
|
-
"url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab",
|
36
|
-
}
|
37
|
-
app.openapi_schema = openapi_schema
|
38
|
-
return app.openapi_schema
|
39
|
-
|
40
|
-
|
41
|
-
app.openapi = custom_openapi
|
@@ -1,17 +0,0 @@
|
|
1
|
-
"""Module for routers"""
|
2
|
-
|
3
|
-
from enum import Enum
|
4
|
-
|
5
|
-
from cool_seq_tool.app import CoolSeqTool
|
6
|
-
|
7
|
-
cool_seq_tool = CoolSeqTool()
|
8
|
-
SERVICE_NAME = "cool_seq_tool"
|
9
|
-
RESP_DESCR = "A response to a validly-formed query."
|
10
|
-
UNHANDLED_EXCEPTION_MSG = "Unhandled exception occurred. Check logs for more details."
|
11
|
-
|
12
|
-
|
13
|
-
class Tags(str, Enum):
|
14
|
-
"""Define tags for endpoints"""
|
15
|
-
|
16
|
-
MANE_TRANSCRIPT = "MANE Transcript"
|
17
|
-
ALIGNMENT_MAPPER = "Alignment Mapper"
|