cool-seq-tool 0.7.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/mappers/exon_genomic_coords.py +32 -15
- cool_seq_tool/sources/mane_transcript_mappings.py +3 -1
- {cool_seq_tool-0.7.1.dist-info → cool_seq_tool-0.9.0.dist-info}/METADATA +31 -31
- {cool_seq_tool-0.7.1.dist-info → cool_seq_tool-0.9.0.dist-info}/RECORD +7 -7
- {cool_seq_tool-0.7.1.dist-info → cool_seq_tool-0.9.0.dist-info}/WHEEL +1 -1
- {cool_seq_tool-0.7.1.dist-info → cool_seq_tool-0.9.0.dist-info}/LICENSE +0 -0
- {cool_seq_tool-0.7.1.dist-info → cool_seq_tool-0.9.0.dist-info}/top_level.txt +0 -0
@@ -87,7 +87,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
|
|
87
87
|
"""Model for representing a boundary for a transcript segment."""
|
88
88
|
|
89
89
|
seg: TxSegment | None = Field(None, description="Transcript segment.")
|
90
|
-
gene: StrictStr | None = Field(
|
90
|
+
gene: StrictStr | None = Field(
|
91
|
+
None, description="Valid, case-sensitive HGNC gene symbol."
|
92
|
+
)
|
91
93
|
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
|
92
94
|
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
|
93
95
|
errors: list[StrictStr] = Field([], description="Error messages.")
|
@@ -139,7 +141,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
|
|
139
141
|
class GenomicTxSegService(BaseModelForbidExtra):
|
140
142
|
"""Service model for genomic and transcript data."""
|
141
143
|
|
142
|
-
gene: StrictStr | None = Field(
|
144
|
+
gene: StrictStr | None = Field(
|
145
|
+
None, description="Valid, case-sensitive HGNC gene symbol."
|
146
|
+
)
|
143
147
|
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
|
144
148
|
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
|
145
149
|
seg_start: TxSegment | None = Field(None, description="Start transcript segment.")
|
@@ -292,7 +296,7 @@ class ExonGenomicCoordsMapper:
|
|
292
296
|
('NC_000001.11', 154192135, 154170399)
|
293
297
|
|
294
298
|
:param transcript: RefSeq transcript accession
|
295
|
-
:param gene: HGNC gene symbol
|
299
|
+
:param gene: Valid, case-sensitive HGNC gene symbol
|
296
300
|
:param exon_start: Starting transcript exon number (1-based). If not provided,
|
297
301
|
must provide ``exon_end``
|
298
302
|
:param exon_start_offset: Starting exon offset
|
@@ -335,9 +339,6 @@ class ExonGenomicCoordsMapper:
|
|
335
339
|
if errors:
|
336
340
|
return _return_service_errors(errors)
|
337
341
|
|
338
|
-
if gene:
|
339
|
-
gene = gene.upper()
|
340
|
-
|
341
342
|
# Get aligned genomic data (hgnc gene, alt_ac, alt_start_i, alt_end_i, strand)
|
342
343
|
# for exon(s)
|
343
344
|
(
|
@@ -455,7 +456,7 @@ class ExonGenomicCoordsMapper:
|
|
455
456
|
following the breakpoint for the 3' end. For the negative strand, adjacent
|
456
457
|
is defined as the exon following the breakpoint for the 5' end and the exon
|
457
458
|
preceding the breakpoint for the 3' end.
|
458
|
-
:param gene:
|
459
|
+
:param gene: A valid, case-sensitive HGNC symbol. Must be given if no ``transcript``
|
459
460
|
value is provided.
|
460
461
|
:param coordinate_type: Coordinate type for ``seg_start_genomic`` and
|
461
462
|
``seg_end_genomic``
|
@@ -473,9 +474,6 @@ class ExonGenomicCoordsMapper:
|
|
473
474
|
if errors:
|
474
475
|
return _return_service_errors(errors)
|
475
476
|
|
476
|
-
if gene is not None:
|
477
|
-
gene = gene.upper()
|
478
|
-
|
479
477
|
params = {}
|
480
478
|
|
481
479
|
if seg_start_genomic:
|
@@ -630,7 +628,7 @@ class ExonGenomicCoordsMapper:
|
|
630
628
|
must provide ``tx_exon_end``
|
631
629
|
:param tx_exon_end: Transcript's exon end coordinates. If not provided, must
|
632
630
|
provide ``tx_exon_start``
|
633
|
-
:param gene: HGNC gene symbol
|
631
|
+
:param gene: A valid, case-sensitive HGNC gene symbol
|
634
632
|
:return: Tuple containing aligned genomic data for start and end exon and
|
635
633
|
warnings if found
|
636
634
|
"""
|
@@ -755,7 +753,7 @@ class ExonGenomicCoordsMapper:
|
|
755
753
|
:param transcript: The transcript to use. If this is not given, we will try the
|
756
754
|
following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
|
757
755
|
Compatible Transcript
|
758
|
-
:param gene: HGNC gene symbol
|
756
|
+
:param gene: Valid, case-sensitive HGNC gene symbol
|
759
757
|
:param get_nearest_transcript_junction: If ``True``, this will return the
|
760
758
|
adjacent exon if the position specified by``seg_start_genomic`` or
|
761
759
|
``seg_end_genomic`` does not occur on an exon. For the positive strand, adjacent
|
@@ -1062,7 +1060,7 @@ class ExonGenomicCoordsMapper:
|
|
1062
1060
|
:param genomic_ac: Genomic RefSeq accession
|
1063
1061
|
:param genomic_pos: Genomic position where the transcript segment occurs
|
1064
1062
|
:param is_seg_start: Whether or not ``genomic_pos`` represents the start position.
|
1065
|
-
:param gene: HGNC gene symbol
|
1063
|
+
:param gene: Valid, case-sensitive HGNC gene symbol
|
1066
1064
|
:param tx_ac: Transcript RefSeq accession. If not provided, will use MANE
|
1067
1065
|
transcript
|
1068
1066
|
:return: Transcript segment data and associated genomic metadata
|
@@ -1171,14 +1169,32 @@ class ExonGenomicCoordsMapper:
|
|
1171
1169
|
:param end: Genomic coordinate of breakpoint
|
1172
1170
|
:return: Exon number corresponding to adjacent exon. Will be 0-based
|
1173
1171
|
"""
|
1174
|
-
|
1172
|
+
# If a transcript has only one exon, return 0
|
1173
|
+
if len(tx_exons_genomic_coords) == 1:
|
1174
|
+
return 0
|
1175
|
+
|
1176
|
+
# Check if a breakpoint occurs before/after the transcript boundaries
|
1177
|
+
bp = start if start else end
|
1178
|
+
exon_list_len = len(tx_exons_genomic_coords) - 1
|
1179
|
+
|
1180
|
+
if strand == Strand.POSITIVE:
|
1181
|
+
if bp < tx_exons_genomic_coords[0].alt_start_i:
|
1182
|
+
return 0
|
1183
|
+
if bp > tx_exons_genomic_coords[exon_list_len].alt_end_i:
|
1184
|
+
return exon_list_len
|
1185
|
+
if strand == Strand.NEGATIVE:
|
1186
|
+
if bp > tx_exons_genomic_coords[0].alt_end_i:
|
1187
|
+
return 0
|
1188
|
+
if bp < tx_exons_genomic_coords[exon_list_len].alt_start_i:
|
1189
|
+
return exon_list_len
|
1190
|
+
|
1191
|
+
for i in range(exon_list_len):
|
1175
1192
|
exon = tx_exons_genomic_coords[i]
|
1176
1193
|
if start == exon.alt_start_i:
|
1177
1194
|
break
|
1178
1195
|
if end == exon.alt_end_i:
|
1179
1196
|
break
|
1180
1197
|
next_exon = tx_exons_genomic_coords[i + 1]
|
1181
|
-
bp = start if start else end
|
1182
1198
|
if strand == Strand.POSITIVE:
|
1183
1199
|
lte_exon = exon
|
1184
1200
|
gte_exon = next_exon
|
@@ -1187,6 +1203,7 @@ class ExonGenomicCoordsMapper:
|
|
1187
1203
|
gte_exon = exon
|
1188
1204
|
if bp >= lte_exon.alt_end_i and bp <= gte_exon.alt_start_i:
|
1189
1205
|
break
|
1206
|
+
|
1190
1207
|
# Return current exon if end position is provided, next exon if start position
|
1191
1208
|
# is provided.
|
1192
1209
|
return exon.ord if end else exon.ord + 1
|
@@ -61,7 +61,9 @@ class ManeTranscriptMappings:
|
|
61
61
|
location information). The list is sorted so that a MANE Select entry comes
|
62
62
|
first, followed by a MANE Plus Clinical entry, if available.
|
63
63
|
"""
|
64
|
-
data = self.df.filter(
|
64
|
+
data = self.df.filter(
|
65
|
+
pl.col("symbol").str.to_uppercase() == gene_symbol.upper()
|
66
|
+
)
|
65
67
|
|
66
68
|
if len(data) == 0:
|
67
69
|
_logger.warning(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cool_seq_tool
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.9.0
|
4
4
|
Summary: Common Operation on Lots of Sequences Tool
|
5
5
|
Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
|
6
6
|
License: MIT License
|
@@ -47,39 +47,39 @@ License-File: LICENSE
|
|
47
47
|
Requires-Dist: asyncpg
|
48
48
|
Requires-Dist: aiofiles
|
49
49
|
Requires-Dist: boto3
|
50
|
-
Requires-Dist: agct
|
51
|
-
Requires-Dist: polars
|
50
|
+
Requires-Dist: agct>=0.1.0-dev1
|
51
|
+
Requires-Dist: polars~=1.0
|
52
52
|
Requires-Dist: hgvs
|
53
53
|
Requires-Dist: biocommons.seqrepo
|
54
|
-
Requires-Dist: pydantic
|
55
|
-
Requires-Dist: ga4gh.vrs
|
56
|
-
Requires-Dist: wags-tails
|
54
|
+
Requires-Dist: pydantic==2.*
|
55
|
+
Requires-Dist: ga4gh.vrs~=2.0.0a10
|
56
|
+
Requires-Dist: wags-tails~=0.2.2
|
57
57
|
Requires-Dist: bioutils
|
58
58
|
Provides-Extra: dev
|
59
|
-
Requires-Dist: pre-commit
|
60
|
-
Requires-Dist: ipython
|
61
|
-
Requires-Dist: ipykernel
|
62
|
-
Requires-Dist: psycopg2-binary
|
63
|
-
Requires-Dist: ruff
|
59
|
+
Requires-Dist: pre-commit>=3.7.1; extra == "dev"
|
60
|
+
Requires-Dist: ipython; extra == "dev"
|
61
|
+
Requires-Dist: ipykernel; extra == "dev"
|
62
|
+
Requires-Dist: psycopg2-binary; extra == "dev"
|
63
|
+
Requires-Dist: ruff==0.5.0; extra == "dev"
|
64
|
+
Provides-Extra: tests
|
65
|
+
Requires-Dist: pytest; extra == "tests"
|
66
|
+
Requires-Dist: pytest-cov; extra == "tests"
|
67
|
+
Requires-Dist: pytest-asyncio==0.18.3; extra == "tests"
|
68
|
+
Requires-Dist: mock; extra == "tests"
|
64
69
|
Provides-Extra: docs
|
65
|
-
Requires-Dist: sphinx
|
66
|
-
Requires-Dist: sphinx-autodoc-typehints
|
67
|
-
Requires-Dist: sphinx-autobuild
|
68
|
-
Requires-Dist: sphinx-copybutton
|
69
|
-
Requires-Dist: sphinxext-opengraph
|
70
|
-
Requires-Dist: furo
|
71
|
-
Requires-Dist: sphinx-github-changelog
|
72
|
-
Provides-Extra: test
|
73
|
-
Requires-Dist: pytest ; extra == 'test'
|
74
|
-
Requires-Dist: pytest-cov ; extra == 'test'
|
75
|
-
Requires-Dist: pytest-asyncio ==0.18.3 ; extra == 'test'
|
76
|
-
Requires-Dist: mock ; extra == 'test'
|
70
|
+
Requires-Dist: sphinx==6.1.3; extra == "docs"
|
71
|
+
Requires-Dist: sphinx-autodoc-typehints==1.22.0; extra == "docs"
|
72
|
+
Requires-Dist: sphinx-autobuild==2021.3.14; extra == "docs"
|
73
|
+
Requires-Dist: sphinx-copybutton==0.5.2; extra == "docs"
|
74
|
+
Requires-Dist: sphinxext-opengraph==0.8.2; extra == "docs"
|
75
|
+
Requires-Dist: furo==2023.3.27; extra == "docs"
|
76
|
+
Requires-Dist: sphinx-github-changelog==1.2.1; extra == "docs"
|
77
77
|
|
78
78
|
<h1 align="center">
|
79
|
-
|
79
|
+
Cool-Seq-Tool
|
80
80
|
</h1>
|
81
81
|
|
82
|
-
[](https://pypi.python.org/pypi/cool-seq-tool) [](https://pypi.python.org/pypi/cool-seq-tool) [](https://pypi.python.org/pypi/cool-seq-tool) [](https://github.com/genomicmedlab/cool-seq-tool/actions/checks.yaml)
|
82
|
+
[](https://pypi.python.org/pypi/cool-seq-tool) [](https://doi.org/10.5281/zenodo.14007783) [](https://pypi.python.org/pypi/cool-seq-tool) [](https://pypi.python.org/pypi/cool-seq-tool) [](https://github.com/genomicmedlab/cool-seq-tool/actions/checks.yaml)
|
83
83
|
|
84
84
|
---
|
85
85
|
|
@@ -90,18 +90,18 @@ CoolSeqTool
|
|
90
90
|
## Overview
|
91
91
|
|
92
92
|
<!-- description -->
|
93
|
-
The **
|
93
|
+
The Common Operations On Lots-Of Sequences Tool, **Cool-Seq-Tool**, provides:
|
94
94
|
|
95
|
-
|
96
|
-
|
97
|
-
|
95
|
+
- A Pythonic API on top of sequence data of interest to tertiary analysis tools, including mappings between gene names and transcripts, [MANE transcript](https://www.ncbi.nlm.nih.gov/refseq/MANE/) descriptions, and transcript alignment data from the [Universal Transcript Archive](https://github.com/biocommons/uta)
|
96
|
+
- Augmented access to the [SeqRepo](https://github.com/biocommons/biocommons.seqrepo) database, including multiple additional methods and tools
|
97
|
+
- Mapping tools, including a transcript selection algorithm for selecting a representative transcript defined [here](https://coolseqtool.readthedocs.io/stable/transcript_selection.html), that combine the above to support translation between references sequences, annotation layers, and transcripts
|
98
98
|
<!-- /description -->
|
99
99
|
|
100
100
|
---
|
101
101
|
|
102
102
|
## Install
|
103
103
|
|
104
|
-
|
104
|
+
Cool-Seq-Tool is available on [PyPI](https://pypi.org/project/cool-seq-tool)
|
105
105
|
|
106
106
|
```shell
|
107
107
|
python3 -m pip install cool-seq-tool
|
@@ -113,7 +113,7 @@ See the [installation instructions](https://coolseqtool.readthedocs.io/stable/in
|
|
113
113
|
|
114
114
|
## Usage
|
115
115
|
|
116
|
-
All
|
116
|
+
All Cool-Seq-Tool resources can be initialized by way of a top-level class instance:
|
117
117
|
|
118
118
|
```pycon
|
119
119
|
>>> from cool_seq_tool import CoolSeqTool
|
@@ -6,7 +6,7 @@ cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2p
|
|
6
6
|
cool_seq_tool/handlers/seqrepo_access.py,sha256=Jd19jbdUvPRPn_XWozL67ph-nSIxpb4_UUimapDrsm4,9162
|
7
7
|
cool_seq_tool/mappers/__init__.py,sha256=O0JRxNFk8nWxD4v5ij47xelhvfVLdEXS43l2tzRuiUE,305
|
8
8
|
cool_seq_tool/mappers/alignment.py,sha256=nV6PS3mhkQ2MD1GcpNBujBOqd3AKxYSYA9BCusFOa1o,9636
|
9
|
-
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=
|
9
|
+
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=XYHWYHL9PcBIKHB_EsN1YKwmhP-KLrGyZv8yH_7huuo,49533
|
10
10
|
cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
|
11
11
|
cool_seq_tool/mappers/mane_transcript.py,sha256=nirxlf3EGVInFYG4fsAqiEmDdTc_h1XuPyX2ul-a7Rk,54368
|
12
12
|
cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
|
@@ -14,11 +14,11 @@ cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oI
|
|
14
14
|
cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
|
15
15
|
cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
|
16
16
|
cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
|
17
|
-
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=
|
17
|
+
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=Q6J57O2lLWXlgKT0zq3BIwkwFawySnORHOX-UxzfyDE,5399
|
18
18
|
cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
|
19
19
|
cool_seq_tool/sources/uta_database.py,sha256=gc5wsKOIhvzhwFmPmqOY0hhaVfRkRSzYNa9tpBt81_U,35017
|
20
|
-
cool_seq_tool-0.
|
21
|
-
cool_seq_tool-0.
|
22
|
-
cool_seq_tool-0.
|
23
|
-
cool_seq_tool-0.
|
24
|
-
cool_seq_tool-0.
|
20
|
+
cool_seq_tool-0.9.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
|
21
|
+
cool_seq_tool-0.9.0.dist-info/METADATA,sha256=GgGZqNTW98YoV49Reizkz2UnPq1MqJOR4jjDHlcWTDQ,6556
|
22
|
+
cool_seq_tool-0.9.0.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
|
23
|
+
cool_seq_tool-0.9.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
|
24
|
+
cool_seq_tool-0.9.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|