cool-seq-tool 0.11.0__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cool_seq_tool/schemas.py CHANGED
@@ -43,11 +43,18 @@ class Assembly(str, Enum):
43
43
  return [item.value for item in cls]
44
44
 
45
45
 
46
+ class ManeStatus(str, Enum):
47
+ """Define constraints for mane status"""
48
+
49
+ SELECT = "mane_select"
50
+ PLUS_CLINICAL = "mane_plus_clinical"
51
+
52
+
46
53
  class TranscriptPriority(str, Enum):
47
54
  """Create Enum for Transcript Priority labels"""
48
55
 
49
- MANE_SELECT = "mane_select"
50
- MANE_PLUS_CLINICAL = "mane_plus_clinical"
56
+ MANE_SELECT = ManeStatus.SELECT.value
57
+ MANE_PLUS_CLINICAL = ManeStatus.PLUS_CLINICAL.value
51
58
  LONGEST_COMPATIBLE_REMAINING = "longest_compatible_remaining"
52
59
  GRCH38 = "grch38"
53
60
 
@@ -137,6 +144,7 @@ class ManeGeneData(BaseModel, extra="forbid"):
137
144
  ncbi_gene_id: StrictInt
138
145
  hgnc_id: StrictInt | None
139
146
  symbol: StrictStr
147
+ status: list[ManeStatus]
140
148
 
141
149
 
142
150
  class ServiceMeta(BaseModelForbidExtra):
@@ -117,26 +117,58 @@ class ManeTranscriptMappings:
117
117
  :param end: Genomic end position. Assumes residue coordinates.
118
118
  :return: Unique MANE gene(s) found for a genomic location
119
119
  """
120
+ # Only interested in rows where genomic location lives
120
121
  mane_rows = self.df.filter(
121
122
  (start >= pl.col("chr_start"))
122
123
  & (end <= pl.col("chr_end"))
123
124
  & (pl.col("GRCh38_chr") == ac)
124
- ).unique(subset=["#NCBI_GeneID"])
125
+ )
125
126
 
126
- if len(mane_rows) == 0:
127
+ if mane_rows.is_empty():
127
128
  return []
128
129
 
129
- mane_rows = mane_rows.with_columns(
130
- pl.col("#NCBI_GeneID")
131
- .str.split_exact(":", 1)
132
- .struct.field("field_1")
133
- .cast(pl.Int32)
134
- .alias("ncbi_gene_id"),
135
- pl.col("HGNC_ID")
136
- .str.split_exact(":", 1)
137
- .struct.field("field_1")
138
- .cast(pl.Int32)
139
- .alias("hgnc_id"),
130
+ # Group rows by NCBI ID, transform values to representation we want, MANE status
131
+ # will be converted to list with DESC order
132
+ mane_rows = mane_rows.group_by("#NCBI_GeneID").agg(
133
+ [
134
+ pl.col("#NCBI_GeneID")
135
+ .first()
136
+ .str.split_exact(":", 1)
137
+ .struct.field("field_1")
138
+ .cast(pl.Int32)
139
+ .alias("ncbi_gene_id"),
140
+ pl.col("HGNC_ID")
141
+ .first()
142
+ .str.split_exact(":", 1)
143
+ .struct.field("field_1")
144
+ .cast(pl.Int32)
145
+ .alias("hgnc_id"),
146
+ pl.col("MANE_status")
147
+ .unique()
148
+ .str.to_lowercase()
149
+ .str.replace_all(" ", "_")
150
+ .alias("status")
151
+ .sort(descending=True),
152
+ pl.col("symbol").first(),
153
+ ]
154
+ )
155
+
156
+ # Sort final rows based on MANE status
157
+ # First by length (which means gene has both select and plus clinical)
158
+ # Then by DESC order
159
+ # Then by NCBI ID ASC order
160
+ mane_rows = (
161
+ mane_rows.with_columns(
162
+ [
163
+ pl.col("status").list.len().alias("status_count"),
164
+ pl.col("status").list.join("_").alias("status_str"),
165
+ pl.col("ncbi_gene_id"),
166
+ ]
167
+ )
168
+ .sort(
169
+ ["status_count", "status_str", "ncbi_gene_id"],
170
+ descending=[True, True, False],
171
+ )
172
+ .drop(["status_count", "status_str", "#NCBI_GeneID"])
140
173
  )
141
- mane_rows = mane_rows.select(["ncbi_gene_id", "hgnc_id", "symbol"])
142
174
  return [ManeGeneData(**mane_gene) for mane_gene in mane_rows.to_dicts()]
@@ -27,7 +27,7 @@ from cool_seq_tool.schemas import (
27
27
  UTADatabaseType = TypeVar("UTADatabaseType", bound="UtaDatabase")
28
28
 
29
29
  UTA_DB_URL = environ.get(
30
- "UTA_DB_URL", "postgresql://uta_admin:uta@localhost:5432/uta/uta_20210129b"
30
+ "UTA_DB_URL", "postgresql://uta_admin:uta@localhost:5432/uta/uta_20241220"
31
31
  )
32
32
 
33
33
  _logger = logging.getLogger(__name__)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: cool_seq_tool
3
- Version: 0.11.0
3
+ Version: 0.12.1
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -1,6 +1,6 @@
1
1
  cool_seq_tool/__init__.py,sha256=pJyVj7Z275BBAwpeFMm-WEn_tp-y1_ihRl1sLc4FFZY,400
2
2
  cool_seq_tool/app.py,sha256=vyqlQRffC8sWZXMm-f_f-8WuTTWo3oRNfPUa_qdPV2M,4944
3
- cool_seq_tool/schemas.py,sha256=HInmKpsujybVR6pRmkKNOIzPCBqk9Ni5q1ZKNFtip50,3945
3
+ cool_seq_tool/schemas.py,sha256=D0DsYAR1ZX7RONuc7X4hsPMKcZct7_2LlnE1KKVNre0,4139
4
4
  cool_seq_tool/utils.py,sha256=kesu7UnOplDzvNBg_G-_m1xMM22979nmsi4yWtweetU,2959
5
5
  cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
6
6
  cool_seq_tool/handlers/seqrepo_access.py,sha256=Jd19jbdUvPRPn_XWozL67ph-nSIxpb4_UUimapDrsm4,9162
@@ -14,11 +14,11 @@ cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oI
14
14
  cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
15
15
  cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
16
16
  cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
17
- cool_seq_tool/sources/mane_transcript_mappings.py,sha256=Q6J57O2lLWXlgKT0zq3BIwkwFawySnORHOX-UxzfyDE,5399
17
+ cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
18
18
  cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
19
- cool_seq_tool/sources/uta_database.py,sha256=s7BkFplD_b2AmvXq8vZSCiBuZLy8RlxAqNyf-6QtR8w,36112
20
- cool_seq_tool-0.11.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
21
- cool_seq_tool-0.11.0.dist-info/METADATA,sha256=VcP6BvVyQ1YVB2u2XsZbEVd9DYYr-ZKcHadIt3ACsBY,6557
22
- cool_seq_tool-0.11.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
23
- cool_seq_tool-0.11.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
24
- cool_seq_tool-0.11.0.dist-info/RECORD,,
19
+ cool_seq_tool/sources/uta_database.py,sha256=V8cuog35-5BtOODu_vreDlBaP_PgBv8r7pYeTXmGsio,36111
20
+ cool_seq_tool-0.12.1.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
21
+ cool_seq_tool-0.12.1.dist-info/METADATA,sha256=EJRFVXjGIBdR-cSbFmDgJ6BnN81meArVPKkv4uUt9YU,6557
22
+ cool_seq_tool-0.12.1.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
23
+ cool_seq_tool-0.12.1.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
24
+ cool_seq_tool-0.12.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (76.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5