cool-seq-tool 0.11.0__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/schemas.py +10 -2
- cool_seq_tool/sources/mane_transcript_mappings.py +46 -14
- cool_seq_tool/sources/uta_database.py +1 -1
- {cool_seq_tool-0.11.0.dist-info → cool_seq_tool-0.12.1.dist-info}/METADATA +1 -1
- {cool_seq_tool-0.11.0.dist-info → cool_seq_tool-0.12.1.dist-info}/RECORD +8 -8
- {cool_seq_tool-0.11.0.dist-info → cool_seq_tool-0.12.1.dist-info}/WHEEL +1 -1
- {cool_seq_tool-0.11.0.dist-info → cool_seq_tool-0.12.1.dist-info}/LICENSE +0 -0
- {cool_seq_tool-0.11.0.dist-info → cool_seq_tool-0.12.1.dist-info}/top_level.txt +0 -0
cool_seq_tool/schemas.py
CHANGED
@@ -43,11 +43,18 @@ class Assembly(str, Enum):
|
|
43
43
|
return [item.value for item in cls]
|
44
44
|
|
45
45
|
|
46
|
+
class ManeStatus(str, Enum):
|
47
|
+
"""Define constraints for mane status"""
|
48
|
+
|
49
|
+
SELECT = "mane_select"
|
50
|
+
PLUS_CLINICAL = "mane_plus_clinical"
|
51
|
+
|
52
|
+
|
46
53
|
class TranscriptPriority(str, Enum):
|
47
54
|
"""Create Enum for Transcript Priority labels"""
|
48
55
|
|
49
|
-
MANE_SELECT =
|
50
|
-
MANE_PLUS_CLINICAL =
|
56
|
+
MANE_SELECT = ManeStatus.SELECT.value
|
57
|
+
MANE_PLUS_CLINICAL = ManeStatus.PLUS_CLINICAL.value
|
51
58
|
LONGEST_COMPATIBLE_REMAINING = "longest_compatible_remaining"
|
52
59
|
GRCH38 = "grch38"
|
53
60
|
|
@@ -137,6 +144,7 @@ class ManeGeneData(BaseModel, extra="forbid"):
|
|
137
144
|
ncbi_gene_id: StrictInt
|
138
145
|
hgnc_id: StrictInt | None
|
139
146
|
symbol: StrictStr
|
147
|
+
status: list[ManeStatus]
|
140
148
|
|
141
149
|
|
142
150
|
class ServiceMeta(BaseModelForbidExtra):
|
@@ -117,26 +117,58 @@ class ManeTranscriptMappings:
|
|
117
117
|
:param end: Genomic end position. Assumes residue coordinates.
|
118
118
|
:return: Unique MANE gene(s) found for a genomic location
|
119
119
|
"""
|
120
|
+
# Only interested in rows where genomic location lives
|
120
121
|
mane_rows = self.df.filter(
|
121
122
|
(start >= pl.col("chr_start"))
|
122
123
|
& (end <= pl.col("chr_end"))
|
123
124
|
& (pl.col("GRCh38_chr") == ac)
|
124
|
-
)
|
125
|
+
)
|
125
126
|
|
126
|
-
if
|
127
|
+
if mane_rows.is_empty():
|
127
128
|
return []
|
128
129
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
130
|
+
# Group rows by NCBI ID, transform values to representation we want, MANE status
|
131
|
+
# will be converted to list with DESC order
|
132
|
+
mane_rows = mane_rows.group_by("#NCBI_GeneID").agg(
|
133
|
+
[
|
134
|
+
pl.col("#NCBI_GeneID")
|
135
|
+
.first()
|
136
|
+
.str.split_exact(":", 1)
|
137
|
+
.struct.field("field_1")
|
138
|
+
.cast(pl.Int32)
|
139
|
+
.alias("ncbi_gene_id"),
|
140
|
+
pl.col("HGNC_ID")
|
141
|
+
.first()
|
142
|
+
.str.split_exact(":", 1)
|
143
|
+
.struct.field("field_1")
|
144
|
+
.cast(pl.Int32)
|
145
|
+
.alias("hgnc_id"),
|
146
|
+
pl.col("MANE_status")
|
147
|
+
.unique()
|
148
|
+
.str.to_lowercase()
|
149
|
+
.str.replace_all(" ", "_")
|
150
|
+
.alias("status")
|
151
|
+
.sort(descending=True),
|
152
|
+
pl.col("symbol").first(),
|
153
|
+
]
|
154
|
+
)
|
155
|
+
|
156
|
+
# Sort final rows based on MANE status
|
157
|
+
# First by length (which means gene has both select and plus clinical)
|
158
|
+
# Then by DESC order
|
159
|
+
# Then by NCBI ID ASC order
|
160
|
+
mane_rows = (
|
161
|
+
mane_rows.with_columns(
|
162
|
+
[
|
163
|
+
pl.col("status").list.len().alias("status_count"),
|
164
|
+
pl.col("status").list.join("_").alias("status_str"),
|
165
|
+
pl.col("ncbi_gene_id"),
|
166
|
+
]
|
167
|
+
)
|
168
|
+
.sort(
|
169
|
+
["status_count", "status_str", "ncbi_gene_id"],
|
170
|
+
descending=[True, True, False],
|
171
|
+
)
|
172
|
+
.drop(["status_count", "status_str", "#NCBI_GeneID"])
|
140
173
|
)
|
141
|
-
mane_rows = mane_rows.select(["ncbi_gene_id", "hgnc_id", "symbol"])
|
142
174
|
return [ManeGeneData(**mane_gene) for mane_gene in mane_rows.to_dicts()]
|
@@ -27,7 +27,7 @@ from cool_seq_tool.schemas import (
|
|
27
27
|
UTADatabaseType = TypeVar("UTADatabaseType", bound="UtaDatabase")
|
28
28
|
|
29
29
|
UTA_DB_URL = environ.get(
|
30
|
-
"UTA_DB_URL", "postgresql://uta_admin:uta@localhost:5432/uta/
|
30
|
+
"UTA_DB_URL", "postgresql://uta_admin:uta@localhost:5432/uta/uta_20241220"
|
31
31
|
)
|
32
32
|
|
33
33
|
_logger = logging.getLogger(__name__)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
cool_seq_tool/__init__.py,sha256=pJyVj7Z275BBAwpeFMm-WEn_tp-y1_ihRl1sLc4FFZY,400
|
2
2
|
cool_seq_tool/app.py,sha256=vyqlQRffC8sWZXMm-f_f-8WuTTWo3oRNfPUa_qdPV2M,4944
|
3
|
-
cool_seq_tool/schemas.py,sha256=
|
3
|
+
cool_seq_tool/schemas.py,sha256=D0DsYAR1ZX7RONuc7X4hsPMKcZct7_2LlnE1KKVNre0,4139
|
4
4
|
cool_seq_tool/utils.py,sha256=kesu7UnOplDzvNBg_G-_m1xMM22979nmsi4yWtweetU,2959
|
5
5
|
cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
|
6
6
|
cool_seq_tool/handlers/seqrepo_access.py,sha256=Jd19jbdUvPRPn_XWozL67ph-nSIxpb4_UUimapDrsm4,9162
|
@@ -14,11 +14,11 @@ cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oI
|
|
14
14
|
cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
|
15
15
|
cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
|
16
16
|
cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
|
17
|
-
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=
|
17
|
+
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
|
18
18
|
cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
|
19
|
-
cool_seq_tool/sources/uta_database.py,sha256=
|
20
|
-
cool_seq_tool-0.
|
21
|
-
cool_seq_tool-0.
|
22
|
-
cool_seq_tool-0.
|
23
|
-
cool_seq_tool-0.
|
24
|
-
cool_seq_tool-0.
|
19
|
+
cool_seq_tool/sources/uta_database.py,sha256=V8cuog35-5BtOODu_vreDlBaP_PgBv8r7pYeTXmGsio,36111
|
20
|
+
cool_seq_tool-0.12.1.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
|
21
|
+
cool_seq_tool-0.12.1.dist-info/METADATA,sha256=EJRFVXjGIBdR-cSbFmDgJ6BnN81meArVPKkv4uUt9YU,6557
|
22
|
+
cool_seq_tool-0.12.1.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
23
|
+
cool_seq_tool-0.12.1.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
|
24
|
+
cool_seq_tool-0.12.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|