cool-seq-tool 0.13.0__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/mappers/__init__.py +1 -1
- cool_seq_tool/mappers/exon_genomic_coords.py +28 -13
- cool_seq_tool/resources/status.py +9 -11
- cool_seq_tool/sources/uta_database.py +11 -11
- {cool_seq_tool-0.13.0.dist-info → cool_seq_tool-0.14.0.dist-info}/METADATA +6 -5
- {cool_seq_tool-0.13.0.dist-info → cool_seq_tool-0.14.0.dist-info}/RECORD +9 -9
- {cool_seq_tool-0.13.0.dist-info → cool_seq_tool-0.14.0.dist-info}/WHEEL +1 -1
- {cool_seq_tool-0.13.0.dist-info → cool_seq_tool-0.14.0.dist-info/licenses}/LICENSE +0 -0
- {cool_seq_tool-0.13.0.dist-info → cool_seq_tool-0.14.0.dist-info}/top_level.txt +0 -0
@@ -6,4 +6,4 @@ from .mane_transcript import ManeTranscript
|
|
6
6
|
from .exon_genomic_coords import ExonGenomicCoordsMapper
|
7
7
|
|
8
8
|
|
9
|
-
__all__ = ["AlignmentMapper", "
|
9
|
+
__all__ = ["AlignmentMapper", "ExonGenomicCoordsMapper", "LiftOver", "ManeTranscript"]
|
@@ -93,6 +93,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
|
|
93
93
|
)
|
94
94
|
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
|
95
95
|
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
|
96
|
+
strand: Strand | None = Field(
|
97
|
+
None, description="The strand that the transcript accession exists on."
|
98
|
+
)
|
96
99
|
errors: list[StrictStr] = Field([], description="Error messages.")
|
97
100
|
|
98
101
|
@model_validator(mode="before")
|
@@ -121,6 +124,7 @@ class GenomicTxSeg(BaseModelForbidExtra):
|
|
121
124
|
"gene": "TPM3",
|
122
125
|
"genomic_ac": "NC_000001.11",
|
123
126
|
"tx_ac": "NM_152263.3",
|
127
|
+
"strand": -1,
|
124
128
|
"seg": {
|
125
129
|
"exon_ord": 0,
|
126
130
|
"offset": 0,
|
@@ -147,6 +151,9 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
147
151
|
)
|
148
152
|
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
|
149
153
|
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
|
154
|
+
strand: Strand | None = Field(
|
155
|
+
None, description="The strand that the transcript exists on."
|
156
|
+
)
|
150
157
|
seg_start: TxSegment | None = Field(None, description="Start transcript segment.")
|
151
158
|
seg_end: TxSegment | None = Field(None, description="End transcript segment.")
|
152
159
|
errors: list[StrictStr] = Field([], description="Error messages.")
|
@@ -183,6 +190,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
183
190
|
"gene": "TPM3",
|
184
191
|
"genomic_ac": "NC_000001.11",
|
185
192
|
"tx_ac": "NM_152263.3",
|
193
|
+
"strand": -1,
|
186
194
|
"seg_start": {
|
187
195
|
"exon_ord": 0,
|
188
196
|
"offset": 0,
|
@@ -400,6 +408,7 @@ class ExonGenomicCoordsMapper:
|
|
400
408
|
gene=gene,
|
401
409
|
genomic_ac=genomic_ac,
|
402
410
|
tx_ac=transcript,
|
411
|
+
strand=strand,
|
403
412
|
seg_start=seg_start,
|
404
413
|
seg_end=seg_end,
|
405
414
|
)
|
@@ -490,6 +499,7 @@ class ExonGenomicCoordsMapper:
|
|
490
499
|
params["gene"] = start_tx_seg_data.gene
|
491
500
|
params["genomic_ac"] = start_tx_seg_data.genomic_ac
|
492
501
|
params["tx_ac"] = start_tx_seg_data.tx_ac
|
502
|
+
params["strand"] = start_tx_seg_data.strand
|
493
503
|
params["seg_start"] = start_tx_seg_data.seg
|
494
504
|
else:
|
495
505
|
start_tx_seg_data = None
|
@@ -524,6 +534,7 @@ class ExonGenomicCoordsMapper:
|
|
524
534
|
params["gene"] = end_tx_seg_data.gene
|
525
535
|
params["genomic_ac"] = end_tx_seg_data.genomic_ac
|
526
536
|
params["tx_ac"] = end_tx_seg_data.tx_ac
|
537
|
+
params["strand"] = end_tx_seg_data.strand
|
527
538
|
|
528
539
|
params["seg_end"] = end_tx_seg_data.seg
|
529
540
|
|
@@ -865,16 +876,21 @@ class ExonGenomicCoordsMapper:
|
|
865
876
|
if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
|
866
877
|
genomic_pos = genomic_pos - 1 # Convert residue coordinate to inter-residue
|
867
878
|
|
868
|
-
# Validate that the breakpoint
|
879
|
+
# Validate that the breakpoint occurs within 150 bp of the first and last exon for the selected transcript.
|
880
|
+
# A breakpoint beyond this range is likely erroneous.
|
869
881
|
coordinate_check = await self._validate_genomic_breakpoint(
|
870
882
|
pos=genomic_pos, genomic_ac=genomic_ac, tx_ac=transcript
|
871
883
|
)
|
872
884
|
if not coordinate_check:
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
885
|
+
msg = (
|
886
|
+
f"{genomic_pos} on {genomic_ac} occurs more than 150 bp outside the "
|
887
|
+
f"exon boundaries of the {transcript} transcript, indicating this may not "
|
888
|
+
f"be a chimeric transcript junction and is unlikely to represent a "
|
889
|
+
f"contiguous coding sequence. Confirm that the genomic position "
|
890
|
+
f"{genomic_pos} is being used to represent transcript junction and not "
|
891
|
+
f"DNA breakpoint."
|
877
892
|
)
|
893
|
+
_logger.warning(msg)
|
878
894
|
|
879
895
|
# Check if breakpoint occurs on an exon.
|
880
896
|
# If not, determine the adjacent exon given the selected transcript
|
@@ -913,6 +929,7 @@ class ExonGenomicCoordsMapper:
|
|
913
929
|
gene=gene,
|
914
930
|
genomic_ac=genomic_ac,
|
915
931
|
tx_ac=transcript,
|
932
|
+
strand=strand,
|
916
933
|
seg=TxSegment(
|
917
934
|
exon_ord=exon_num,
|
918
935
|
offset=offset,
|
@@ -955,8 +972,9 @@ class ExonGenomicCoordsMapper:
|
|
955
972
|
:param pos: Genomic position on ``genomic_ac``
|
956
973
|
:param genomic_ac: RefSeq genomic accession, e.g. ``"NC_000007.14"``
|
957
974
|
:param transcript: A transcript accession
|
958
|
-
:return: ``True`` if the coordinate falls within the first and last exon
|
959
|
-
for the transcript, ``False`` if not
|
975
|
+
:return: ``True`` if the coordinate falls within 150bp of the first and last exon
|
976
|
+
for the transcript, ``False`` if not. Breakpoints past this threshold
|
977
|
+
are likely erroneous.
|
960
978
|
"""
|
961
979
|
query = f"""
|
962
980
|
WITH tx_boundaries AS (
|
@@ -968,7 +986,7 @@ class ExonGenomicCoordsMapper:
|
|
968
986
|
AND alt_ac = '{genomic_ac}'
|
969
987
|
)
|
970
988
|
SELECT * FROM tx_boundaries
|
971
|
-
WHERE {pos} between tx_boundaries.min_start and tx_boundaries.max_end
|
989
|
+
WHERE {pos} between (tx_boundaries.min_start - 150) and (tx_boundaries.max_end + 150)
|
972
990
|
""" # noqa: S608
|
973
991
|
results = await self.uta_db.execute_query(query)
|
974
992
|
return bool(results)
|
@@ -1022,11 +1040,8 @@ class ExonGenomicCoordsMapper:
|
|
1022
1040
|
:return ``True`` if alt_start_i should be used, ``False`` if alt_end_i should
|
1023
1041
|
be used
|
1024
1042
|
"""
|
1025
|
-
return (
|
1026
|
-
is_seg_start
|
1027
|
-
and strand == Strand.POSITIVE
|
1028
|
-
or not is_seg_start
|
1029
|
-
and strand == Strand.NEGATIVE
|
1043
|
+
return (is_seg_start and strand == Strand.POSITIVE) or (
|
1044
|
+
not is_seg_start and strand == Strand.NEGATIVE
|
1030
1045
|
)
|
1031
1046
|
|
1032
1047
|
@staticmethod
|
@@ -88,11 +88,11 @@ async def check_status(
|
|
88
88
|
try:
|
89
89
|
get_data_file(r)
|
90
90
|
except FileNotFoundError:
|
91
|
-
_logger.
|
91
|
+
_logger.exception(
|
92
92
|
"%s does not exist at configured location %s", name_lower, declared_path
|
93
93
|
)
|
94
94
|
except ValueError:
|
95
|
-
_logger.
|
95
|
+
_logger.exception(
|
96
96
|
"%s configured at %s is not a valid file.", name_lower, declared_path
|
97
97
|
)
|
98
98
|
except Exception as e:
|
@@ -107,8 +107,8 @@ async def check_status(
|
|
107
107
|
chain_file_37_to_38=chain_file_37_to_38,
|
108
108
|
chain_file_38_to_37=chain_file_38_to_37,
|
109
109
|
)
|
110
|
-
except (FileNotFoundError, ChainfileError)
|
111
|
-
_logger.
|
110
|
+
except (FileNotFoundError, ChainfileError):
|
111
|
+
_logger.exception("agct converter setup failed")
|
112
112
|
except Exception as e:
|
113
113
|
_logger.critical("Encountered unexpected error setting up agct: %s", e)
|
114
114
|
else:
|
@@ -116,10 +116,8 @@ async def check_status(
|
|
116
116
|
|
117
117
|
try:
|
118
118
|
await UtaDatabase.create(db_url)
|
119
|
-
except (OSError, InvalidCatalogNameError, UndefinedTableError)
|
120
|
-
_logger.error
|
121
|
-
"Encountered error instantiating UTA at URI %s: %s", UTA_DB_URL, e
|
122
|
-
)
|
119
|
+
except (OSError, InvalidCatalogNameError, UndefinedTableError):
|
120
|
+
_logger.exception("Encountered error instantiating UTA at URI %s", UTA_DB_URL)
|
123
121
|
except Exception as e:
|
124
122
|
_logger.critical(
|
125
123
|
"Encountered unexpected error instantiating UTA from URI %s: %s",
|
@@ -134,10 +132,10 @@ async def check_status(
|
|
134
132
|
sr = SeqRepo(root_dir=SEQREPO_ROOT_DIR)
|
135
133
|
sra = SeqRepoAccess(sr)
|
136
134
|
sra.sr["NC_000001.11"][1000:1001]
|
137
|
-
except OSError
|
138
|
-
_logger.
|
135
|
+
except OSError:
|
136
|
+
_logger.exception("Encountered error while instantiating SeqRepo")
|
139
137
|
except KeyError:
|
140
|
-
_logger.
|
138
|
+
_logger.exception("SeqRepo data fetch test failed -- is it populated?")
|
141
139
|
except Exception as e:
|
142
140
|
_logger.critical("Encountered unexpected error setting up SeqRepo: %s", e)
|
143
141
|
else:
|
@@ -162,11 +162,11 @@ class UtaDatabase:
|
|
162
162
|
database=self.args.database,
|
163
163
|
)
|
164
164
|
except InterfaceError as e:
|
165
|
-
_logger.
|
166
|
-
"While creating connection pool, encountered exception
|
165
|
+
_logger.exception(
|
166
|
+
"While creating connection pool, encountered exception"
|
167
167
|
)
|
168
168
|
msg = "Could not create connection pool"
|
169
|
-
raise Exception(msg) from e
|
169
|
+
raise Exception(msg) from e # noqa: TRY002
|
170
170
|
|
171
171
|
@classmethod
|
172
172
|
async def create(
|
@@ -221,7 +221,7 @@ class UtaDatabase:
|
|
221
221
|
WHERE table_schema = '{self.schema}'
|
222
222
|
AND table_name = 'genomic'
|
223
223
|
);
|
224
|
-
"""
|
224
|
+
"""
|
225
225
|
genomic_table_exists = await self.execute_query(check_table_exists)
|
226
226
|
genomic_table_exists = genomic_table_exists[0].get("exists")
|
227
227
|
if genomic_table_exists is None:
|
@@ -250,7 +250,7 @@ class UtaDatabase:
|
|
250
250
|
LEFT JOIN {self.schema}.exon_aln ea ON
|
251
251
|
(((te.exon_id = ea.tx_exon_id) AND
|
252
252
|
(ae.exon_id = ea.alt_exon_id))));
|
253
|
-
"""
|
253
|
+
"""
|
254
254
|
await self.execute_query(create_genomic_table)
|
255
255
|
|
256
256
|
indexes = [
|
@@ -499,7 +499,7 @@ class UtaDatabase:
|
|
499
499
|
AND {start_pos} BETWEEN {pos_q}
|
500
500
|
AND {end_pos} BETWEEN {pos_q}
|
501
501
|
{order_by_cond}
|
502
|
-
"""
|
502
|
+
"""
|
503
503
|
result = await self.execute_query(query)
|
504
504
|
if not result:
|
505
505
|
_logger.warning("Unable to find transcript alignment for query: %s", query)
|
@@ -852,8 +852,8 @@ class UtaDatabase:
|
|
852
852
|
|
853
853
|
try:
|
854
854
|
assembly = Assembly(assembly)
|
855
|
-
except ValueError
|
856
|
-
_logger.
|
855
|
+
except ValueError:
|
856
|
+
_logger.exception("Unable to parse %s as an Assembly", assembly)
|
857
857
|
return None
|
858
858
|
|
859
859
|
return chromosome, assembly
|
@@ -924,11 +924,11 @@ class UtaDatabase:
|
|
924
924
|
|
925
925
|
try:
|
926
926
|
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
|
927
|
-
except ClientError
|
927
|
+
except ClientError:
|
928
928
|
# For a list of exceptions thrown, see
|
929
929
|
# https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
|
930
|
-
_logger.error
|
931
|
-
raise
|
930
|
+
_logger.exception("Encountered AWS client error fetching UTA DB secret")
|
931
|
+
raise
|
932
932
|
else:
|
933
933
|
return get_secret_value_response["SecretString"]
|
934
934
|
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: cool_seq_tool
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.14.0
|
4
4
|
Summary: Common Operation on Lots of Sequences Tool
|
5
5
|
Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
|
6
6
|
License: MIT License
|
@@ -53,14 +53,14 @@ Requires-Dist: hgvs
|
|
53
53
|
Requires-Dist: biocommons.seqrepo
|
54
54
|
Requires-Dist: pydantic==2.*
|
55
55
|
Requires-Dist: ga4gh.vrs~=2.0.0a10
|
56
|
-
Requires-Dist: wags-tails~=0.
|
56
|
+
Requires-Dist: wags-tails~=0.3.2
|
57
57
|
Requires-Dist: bioutils
|
58
58
|
Provides-Extra: dev
|
59
|
-
Requires-Dist: pre-commit>=
|
59
|
+
Requires-Dist: pre-commit>=4.0.1; extra == "dev"
|
60
60
|
Requires-Dist: ipython; extra == "dev"
|
61
61
|
Requires-Dist: ipykernel; extra == "dev"
|
62
62
|
Requires-Dist: psycopg2-binary; extra == "dev"
|
63
|
-
Requires-Dist: ruff==0.
|
63
|
+
Requires-Dist: ruff==0.8.6; extra == "dev"
|
64
64
|
Provides-Extra: tests
|
65
65
|
Requires-Dist: pytest; extra == "tests"
|
66
66
|
Requires-Dist: pytest-cov; extra == "tests"
|
@@ -74,6 +74,7 @@ Requires-Dist: sphinx-copybutton==0.5.2; extra == "docs"
|
|
74
74
|
Requires-Dist: sphinxext-opengraph==0.8.2; extra == "docs"
|
75
75
|
Requires-Dist: furo==2023.3.27; extra == "docs"
|
76
76
|
Requires-Dist: sphinx-github-changelog==1.2.1; extra == "docs"
|
77
|
+
Dynamic: license-file
|
77
78
|
|
78
79
|
<h1 align="center">
|
79
80
|
Cool-Seq-Tool
|
@@ -4,21 +4,21 @@ cool_seq_tool/schemas.py,sha256=D0DsYAR1ZX7RONuc7X4hsPMKcZct7_2LlnE1KKVNre0,4139
|
|
4
4
|
cool_seq_tool/utils.py,sha256=kesu7UnOplDzvNBg_G-_m1xMM22979nmsi4yWtweetU,2959
|
5
5
|
cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
|
6
6
|
cool_seq_tool/handlers/seqrepo_access.py,sha256=Jd19jbdUvPRPn_XWozL67ph-nSIxpb4_UUimapDrsm4,9162
|
7
|
-
cool_seq_tool/mappers/__init__.py,sha256=
|
7
|
+
cool_seq_tool/mappers/__init__.py,sha256=4_YNwNyw_QrlhRNu1nly8Dezv81XjCIiNa7crVXEh38,305
|
8
8
|
cool_seq_tool/mappers/alignment.py,sha256=nV6PS3mhkQ2MD1GcpNBujBOqd3AKxYSYA9BCusFOa1o,9636
|
9
|
-
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=
|
9
|
+
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=uPBlI9K_IUV3xN9unnCwd6bPmqpfIWB5I-dksN_JZSw,44765
|
10
10
|
cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
|
11
11
|
cool_seq_tool/mappers/mane_transcript.py,sha256=C9eKEj8qhVg878oUhBKPYAZS7gpLM5aaQ0HhSkUg-2g,54365
|
12
12
|
cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
|
13
13
|
cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oIjqk2sYQM,3837
|
14
|
-
cool_seq_tool/resources/status.py,sha256=
|
14
|
+
cool_seq_tool/resources/status.py,sha256=9LYSO2mOzVmoSQwllzq1mGChjtDA6j3I0S372N89clA,5683
|
15
15
|
cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
|
16
16
|
cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
|
17
17
|
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
|
18
18
|
cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
|
19
|
-
cool_seq_tool/sources/uta_database.py,sha256=
|
20
|
-
cool_seq_tool-0.
|
21
|
-
cool_seq_tool-0.
|
22
|
-
cool_seq_tool-0.
|
23
|
-
cool_seq_tool-0.
|
24
|
-
cool_seq_tool-0.
|
19
|
+
cool_seq_tool/sources/uta_database.py,sha256=UHFLeiuk8H29CF1tNjE8T22-QaPs_fDUaqQO6Hu18yg,36175
|
20
|
+
cool_seq_tool-0.14.0.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
|
21
|
+
cool_seq_tool-0.14.0.dist-info/METADATA,sha256=1BpHtcCsaCUBvpmp1Qe1TVoZFHVvUir685EsH925PJ0,6579
|
22
|
+
cool_seq_tool-0.14.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
23
|
+
cool_seq_tool-0.14.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
|
24
|
+
cool_seq_tool-0.14.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|