cool-seq-tool 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/mappers/exon_genomic_coords.py +14 -20
- cool_seq_tool/schemas.py +10 -2
- cool_seq_tool/sources/mane_transcript_mappings.py +46 -14
- {cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/METADATA +2 -2
- {cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/RECORD +8 -8
- {cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/WHEEL +1 -1
- {cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/LICENSE +0 -0
- {cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/top_level.txt +0 -0
@@ -865,14 +865,14 @@ class ExonGenomicCoordsMapper:
|
|
865
865
|
if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
|
866
866
|
genomic_pos = genomic_pos - 1 # Convert residue coordinate to inter-residue
|
867
867
|
|
868
|
-
# Validate that the breakpoint
|
869
|
-
coordinate_check = await self.
|
870
|
-
pos=genomic_pos, genomic_ac=genomic_ac,
|
868
|
+
# Validate that the breakpoint between the first and last exon for the selected transcript
|
869
|
+
coordinate_check = await self._validate_genomic_breakpoint(
|
870
|
+
pos=genomic_pos, genomic_ac=genomic_ac, tx_ac=transcript
|
871
871
|
)
|
872
872
|
if not coordinate_check:
|
873
873
|
return GenomicTxSeg(
|
874
874
|
errors=[
|
875
|
-
f"{genomic_pos} on {genomic_ac} does not occur within the exons for {
|
875
|
+
f"{genomic_pos} on {genomic_ac} does not occur within the exons for {transcript}"
|
876
876
|
]
|
877
877
|
)
|
878
878
|
|
@@ -943,38 +943,32 @@ class ExonGenomicCoordsMapper:
|
|
943
943
|
)
|
944
944
|
return liftover_data[1] if liftover_data else None
|
945
945
|
|
946
|
-
async def
|
946
|
+
async def _validate_genomic_breakpoint(
|
947
947
|
self,
|
948
948
|
pos: int,
|
949
949
|
genomic_ac: str,
|
950
|
-
|
950
|
+
tx_ac: str,
|
951
951
|
) -> bool:
|
952
952
|
"""Validate that a genomic coordinate falls within the first and last exon
|
953
|
-
|
953
|
+
for a transcript on a given accession
|
954
954
|
|
955
955
|
:param pos: Genomic position on ``genomic_ac``
|
956
956
|
:param genomic_ac: RefSeq genomic accession, e.g. ``"NC_000007.14"``
|
957
|
-
:param
|
957
|
+
:param transcript: A transcript accession
|
958
958
|
:return: ``True`` if the coordinate falls within the first and last exon
|
959
|
-
for the
|
959
|
+
for the transcript, ``False`` if not
|
960
960
|
"""
|
961
961
|
query = f"""
|
962
962
|
WITH tx_boundaries AS (
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
MIN(alt_start_i) as min_start,
|
967
|
-
MAX(alt_end_i) as max_end
|
963
|
+
SELECT
|
964
|
+
MIN(alt_start_i) AS min_start,
|
965
|
+
MAX(alt_end_i) AS max_end
|
968
966
|
FROM {self.uta_db.schema}.tx_exon_aln_v
|
969
|
-
WHERE
|
967
|
+
WHERE tx_ac = '{tx_ac}'
|
970
968
|
AND alt_ac = '{genomic_ac}'
|
971
|
-
GROUP BY tx_ac, hgnc
|
972
969
|
)
|
973
|
-
SELECT
|
974
|
-
FROM tx_boundaries
|
970
|
+
SELECT * FROM tx_boundaries
|
975
971
|
WHERE {pos} between tx_boundaries.min_start and tx_boundaries.max_end
|
976
|
-
ORDER BY hgnc
|
977
|
-
LIMIT 1;
|
978
972
|
""" # noqa: S608
|
979
973
|
results = await self.uta_db.execute_query(query)
|
980
974
|
return bool(results)
|
cool_seq_tool/schemas.py
CHANGED
@@ -43,11 +43,18 @@ class Assembly(str, Enum):
|
|
43
43
|
return [item.value for item in cls]
|
44
44
|
|
45
45
|
|
46
|
+
class ManeStatus(str, Enum):
|
47
|
+
"""Define constraints for mane status"""
|
48
|
+
|
49
|
+
SELECT = "mane_select"
|
50
|
+
PLUS_CLINICAL = "mane_plus_clinical"
|
51
|
+
|
52
|
+
|
46
53
|
class TranscriptPriority(str, Enum):
|
47
54
|
"""Create Enum for Transcript Priority labels"""
|
48
55
|
|
49
|
-
MANE_SELECT =
|
50
|
-
MANE_PLUS_CLINICAL =
|
56
|
+
MANE_SELECT = ManeStatus.SELECT.value
|
57
|
+
MANE_PLUS_CLINICAL = ManeStatus.PLUS_CLINICAL.value
|
51
58
|
LONGEST_COMPATIBLE_REMAINING = "longest_compatible_remaining"
|
52
59
|
GRCH38 = "grch38"
|
53
60
|
|
@@ -137,6 +144,7 @@ class ManeGeneData(BaseModel, extra="forbid"):
|
|
137
144
|
ncbi_gene_id: StrictInt
|
138
145
|
hgnc_id: StrictInt | None
|
139
146
|
symbol: StrictStr
|
147
|
+
status: list[ManeStatus]
|
140
148
|
|
141
149
|
|
142
150
|
class ServiceMeta(BaseModelForbidExtra):
|
@@ -117,26 +117,58 @@ class ManeTranscriptMappings:
|
|
117
117
|
:param end: Genomic end position. Assumes residue coordinates.
|
118
118
|
:return: Unique MANE gene(s) found for a genomic location
|
119
119
|
"""
|
120
|
+
# Only interested in rows where genomic location lives
|
120
121
|
mane_rows = self.df.filter(
|
121
122
|
(start >= pl.col("chr_start"))
|
122
123
|
& (end <= pl.col("chr_end"))
|
123
124
|
& (pl.col("GRCh38_chr") == ac)
|
124
|
-
)
|
125
|
+
)
|
125
126
|
|
126
|
-
if
|
127
|
+
if mane_rows.is_empty():
|
127
128
|
return []
|
128
129
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
130
|
+
# Group rows by NCBI ID, transform values to representation we want, MANE status
|
131
|
+
# will be converted to list with DESC order
|
132
|
+
mane_rows = mane_rows.group_by("#NCBI_GeneID").agg(
|
133
|
+
[
|
134
|
+
pl.col("#NCBI_GeneID")
|
135
|
+
.first()
|
136
|
+
.str.split_exact(":", 1)
|
137
|
+
.struct.field("field_1")
|
138
|
+
.cast(pl.Int32)
|
139
|
+
.alias("ncbi_gene_id"),
|
140
|
+
pl.col("HGNC_ID")
|
141
|
+
.first()
|
142
|
+
.str.split_exact(":", 1)
|
143
|
+
.struct.field("field_1")
|
144
|
+
.cast(pl.Int32)
|
145
|
+
.alias("hgnc_id"),
|
146
|
+
pl.col("MANE_status")
|
147
|
+
.unique()
|
148
|
+
.str.to_lowercase()
|
149
|
+
.str.replace_all(" ", "_")
|
150
|
+
.alias("status")
|
151
|
+
.sort(descending=True),
|
152
|
+
pl.col("symbol").first(),
|
153
|
+
]
|
154
|
+
)
|
155
|
+
|
156
|
+
# Sort final rows based on MANE status
|
157
|
+
# First by length (which means gene has both select and plus clinical)
|
158
|
+
# Then by DESC order
|
159
|
+
# Then by NCBI ID ASC order
|
160
|
+
mane_rows = (
|
161
|
+
mane_rows.with_columns(
|
162
|
+
[
|
163
|
+
pl.col("status").list.len().alias("status_count"),
|
164
|
+
pl.col("status").list.join("_").alias("status_str"),
|
165
|
+
pl.col("ncbi_gene_id"),
|
166
|
+
]
|
167
|
+
)
|
168
|
+
.sort(
|
169
|
+
["status_count", "status_str", "ncbi_gene_id"],
|
170
|
+
descending=[True, True, False],
|
171
|
+
)
|
172
|
+
.drop(["status_count", "status_str", "#NCBI_GeneID"])
|
140
173
|
)
|
141
|
-
mane_rows = mane_rows.select(["ncbi_gene_id", "hgnc_id", "symbol"])
|
142
174
|
return [ManeGeneData(**mane_gene) for mane_gene in mane_rows.to_dicts()]
|
@@ -1,12 +1,12 @@
|
|
1
1
|
cool_seq_tool/__init__.py,sha256=pJyVj7Z275BBAwpeFMm-WEn_tp-y1_ihRl1sLc4FFZY,400
|
2
2
|
cool_seq_tool/app.py,sha256=vyqlQRffC8sWZXMm-f_f-8WuTTWo3oRNfPUa_qdPV2M,4944
|
3
|
-
cool_seq_tool/schemas.py,sha256=
|
3
|
+
cool_seq_tool/schemas.py,sha256=D0DsYAR1ZX7RONuc7X4hsPMKcZct7_2LlnE1KKVNre0,4139
|
4
4
|
cool_seq_tool/utils.py,sha256=kesu7UnOplDzvNBg_G-_m1xMM22979nmsi4yWtweetU,2959
|
5
5
|
cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
|
6
6
|
cool_seq_tool/handlers/seqrepo_access.py,sha256=Jd19jbdUvPRPn_XWozL67ph-nSIxpb4_UUimapDrsm4,9162
|
7
7
|
cool_seq_tool/mappers/__init__.py,sha256=O0JRxNFk8nWxD4v5ij47xelhvfVLdEXS43l2tzRuiUE,305
|
8
8
|
cool_seq_tool/mappers/alignment.py,sha256=nV6PS3mhkQ2MD1GcpNBujBOqd3AKxYSYA9BCusFOa1o,9636
|
9
|
-
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=
|
9
|
+
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=ORYjBVaX1HO6ln0gRJyRKxUCjZrBDi4JfYQEYebxIAc,43824
|
10
10
|
cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
|
11
11
|
cool_seq_tool/mappers/mane_transcript.py,sha256=C9eKEj8qhVg878oUhBKPYAZS7gpLM5aaQ0HhSkUg-2g,54365
|
12
12
|
cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
|
@@ -14,11 +14,11 @@ cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oI
|
|
14
14
|
cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
|
15
15
|
cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
|
16
16
|
cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
|
17
|
-
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=
|
17
|
+
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
|
18
18
|
cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
|
19
19
|
cool_seq_tool/sources/uta_database.py,sha256=s7BkFplD_b2AmvXq8vZSCiBuZLy8RlxAqNyf-6QtR8w,36112
|
20
|
-
cool_seq_tool-0.
|
21
|
-
cool_seq_tool-0.
|
22
|
-
cool_seq_tool-0.
|
23
|
-
cool_seq_tool-0.
|
24
|
-
cool_seq_tool-0.
|
20
|
+
cool_seq_tool-0.12.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
|
21
|
+
cool_seq_tool-0.12.0.dist-info/METADATA,sha256=Nt7O4bD59cQqje3eH_sKPkP8uvPz9ApxjKMvS6so0HE,6557
|
22
|
+
cool_seq_tool-0.12.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
23
|
+
cool_seq_tool-0.12.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
|
24
|
+
cool_seq_tool-0.12.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|