cool-seq-tool 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -865,14 +865,14 @@ class ExonGenomicCoordsMapper:
865
865
  if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
866
866
  genomic_pos = genomic_pos - 1 # Convert residue coordinate to inter-residue
867
867
 
868
- # Validate that the breakpoint occurs on a transcript given a gene
869
- coordinate_check = await self._validate_gene_coordinates(
870
- pos=genomic_pos, genomic_ac=genomic_ac, gene=gene
868
+ # Validate that the breakpoint between the first and last exon for the selected transcript
869
+ coordinate_check = await self._validate_genomic_breakpoint(
870
+ pos=genomic_pos, genomic_ac=genomic_ac, tx_ac=transcript
871
871
  )
872
872
  if not coordinate_check:
873
873
  return GenomicTxSeg(
874
874
  errors=[
875
- f"{genomic_pos} on {genomic_ac} does not occur within the exons for {gene}"
875
+ f"{genomic_pos} on {genomic_ac} does not occur within the exons for {transcript}"
876
876
  ]
877
877
  )
878
878
 
@@ -943,38 +943,32 @@ class ExonGenomicCoordsMapper:
943
943
  )
944
944
  return liftover_data[1] if liftover_data else None
945
945
 
946
- async def _validate_gene_coordinates(
946
+ async def _validate_genomic_breakpoint(
947
947
  self,
948
948
  pos: int,
949
949
  genomic_ac: str,
950
- gene: str,
950
+ tx_ac: str,
951
951
  ) -> bool:
952
952
  """Validate that a genomic coordinate falls within the first and last exon
953
- given a gene and accession
953
+ for a transcript on a given accession
954
954
 
955
955
  :param pos: Genomic position on ``genomic_ac``
956
956
  :param genomic_ac: RefSeq genomic accession, e.g. ``"NC_000007.14"``
957
- :param gene: A valid, case-sensitive HGNC gene symbol
957
+ :param transcript: A transcript accession
958
958
  :return: ``True`` if the coordinate falls within the first and last exon
959
- for the gene, ``False`` if not
959
+ for the transcript, ``False`` if not
960
960
  """
961
961
  query = f"""
962
962
  WITH tx_boundaries AS (
963
- SELECT
964
- tx_ac,
965
- hgnc,
966
- MIN(alt_start_i) as min_start,
967
- MAX(alt_end_i) as max_end
963
+ SELECT
964
+ MIN(alt_start_i) AS min_start,
965
+ MAX(alt_end_i) AS max_end
968
966
  FROM {self.uta_db.schema}.tx_exon_aln_v
969
- WHERE hgnc = '{gene}'
967
+ WHERE tx_ac = '{tx_ac}'
970
968
  AND alt_ac = '{genomic_ac}'
971
- GROUP BY tx_ac, hgnc
972
969
  )
973
- SELECT DISTINCT hgnc
974
- FROM tx_boundaries
970
+ SELECT * FROM tx_boundaries
975
971
  WHERE {pos} between tx_boundaries.min_start and tx_boundaries.max_end
976
- ORDER BY hgnc
977
- LIMIT 1;
978
972
  """ # noqa: S608
979
973
  results = await self.uta_db.execute_query(query)
980
974
  return bool(results)
cool_seq_tool/schemas.py CHANGED
@@ -43,11 +43,18 @@ class Assembly(str, Enum):
43
43
  return [item.value for item in cls]
44
44
 
45
45
 
46
+ class ManeStatus(str, Enum):
47
+ """Define constraints for mane status"""
48
+
49
+ SELECT = "mane_select"
50
+ PLUS_CLINICAL = "mane_plus_clinical"
51
+
52
+
46
53
  class TranscriptPriority(str, Enum):
47
54
  """Create Enum for Transcript Priority labels"""
48
55
 
49
- MANE_SELECT = "mane_select"
50
- MANE_PLUS_CLINICAL = "mane_plus_clinical"
56
+ MANE_SELECT = ManeStatus.SELECT.value
57
+ MANE_PLUS_CLINICAL = ManeStatus.PLUS_CLINICAL.value
51
58
  LONGEST_COMPATIBLE_REMAINING = "longest_compatible_remaining"
52
59
  GRCH38 = "grch38"
53
60
 
@@ -137,6 +144,7 @@ class ManeGeneData(BaseModel, extra="forbid"):
137
144
  ncbi_gene_id: StrictInt
138
145
  hgnc_id: StrictInt | None
139
146
  symbol: StrictStr
147
+ status: list[ManeStatus]
140
148
 
141
149
 
142
150
  class ServiceMeta(BaseModelForbidExtra):
@@ -117,26 +117,58 @@ class ManeTranscriptMappings:
117
117
  :param end: Genomic end position. Assumes residue coordinates.
118
118
  :return: Unique MANE gene(s) found for a genomic location
119
119
  """
120
+ # Only interested in rows where genomic location lives
120
121
  mane_rows = self.df.filter(
121
122
  (start >= pl.col("chr_start"))
122
123
  & (end <= pl.col("chr_end"))
123
124
  & (pl.col("GRCh38_chr") == ac)
124
- ).unique(subset=["#NCBI_GeneID"])
125
+ )
125
126
 
126
- if len(mane_rows) == 0:
127
+ if mane_rows.is_empty():
127
128
  return []
128
129
 
129
- mane_rows = mane_rows.with_columns(
130
- pl.col("#NCBI_GeneID")
131
- .str.split_exact(":", 1)
132
- .struct.field("field_1")
133
- .cast(pl.Int32)
134
- .alias("ncbi_gene_id"),
135
- pl.col("HGNC_ID")
136
- .str.split_exact(":", 1)
137
- .struct.field("field_1")
138
- .cast(pl.Int32)
139
- .alias("hgnc_id"),
130
+ # Group rows by NCBI ID, transform values to representation we want, MANE status
131
+ # will be converted to list with DESC order
132
+ mane_rows = mane_rows.group_by("#NCBI_GeneID").agg(
133
+ [
134
+ pl.col("#NCBI_GeneID")
135
+ .first()
136
+ .str.split_exact(":", 1)
137
+ .struct.field("field_1")
138
+ .cast(pl.Int32)
139
+ .alias("ncbi_gene_id"),
140
+ pl.col("HGNC_ID")
141
+ .first()
142
+ .str.split_exact(":", 1)
143
+ .struct.field("field_1")
144
+ .cast(pl.Int32)
145
+ .alias("hgnc_id"),
146
+ pl.col("MANE_status")
147
+ .unique()
148
+ .str.to_lowercase()
149
+ .str.replace_all(" ", "_")
150
+ .alias("status")
151
+ .sort(descending=True),
152
+ pl.col("symbol").first(),
153
+ ]
154
+ )
155
+
156
+ # Sort final rows based on MANE status
157
+ # First by length (which means gene has both select and plus clinical)
158
+ # Then by DESC order
159
+ # Then by NCBI ID ASC order
160
+ mane_rows = (
161
+ mane_rows.with_columns(
162
+ [
163
+ pl.col("status").list.len().alias("status_count"),
164
+ pl.col("status").list.join("_").alias("status_str"),
165
+ pl.col("ncbi_gene_id"),
166
+ ]
167
+ )
168
+ .sort(
169
+ ["status_count", "status_str", "ncbi_gene_id"],
170
+ descending=[True, True, False],
171
+ )
172
+ .drop(["status_count", "status_str", "#NCBI_GeneID"])
140
173
  )
141
- mane_rows = mane_rows.select(["ncbi_gene_id", "hgnc_id", "symbol"])
142
174
  return [ManeGeneData(**mane_gene) for mane_gene in mane_rows.to_dicts()]
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: cool_seq_tool
3
- Version: 0.10.0
3
+ Version: 0.12.0
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -1,12 +1,12 @@
1
1
  cool_seq_tool/__init__.py,sha256=pJyVj7Z275BBAwpeFMm-WEn_tp-y1_ihRl1sLc4FFZY,400
2
2
  cool_seq_tool/app.py,sha256=vyqlQRffC8sWZXMm-f_f-8WuTTWo3oRNfPUa_qdPV2M,4944
3
- cool_seq_tool/schemas.py,sha256=HInmKpsujybVR6pRmkKNOIzPCBqk9Ni5q1ZKNFtip50,3945
3
+ cool_seq_tool/schemas.py,sha256=D0DsYAR1ZX7RONuc7X4hsPMKcZct7_2LlnE1KKVNre0,4139
4
4
  cool_seq_tool/utils.py,sha256=kesu7UnOplDzvNBg_G-_m1xMM22979nmsi4yWtweetU,2959
5
5
  cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
6
6
  cool_seq_tool/handlers/seqrepo_access.py,sha256=Jd19jbdUvPRPn_XWozL67ph-nSIxpb4_UUimapDrsm4,9162
7
7
  cool_seq_tool/mappers/__init__.py,sha256=O0JRxNFk8nWxD4v5ij47xelhvfVLdEXS43l2tzRuiUE,305
8
8
  cool_seq_tool/mappers/alignment.py,sha256=nV6PS3mhkQ2MD1GcpNBujBOqd3AKxYSYA9BCusFOa1o,9636
9
- cool_seq_tool/mappers/exon_genomic_coords.py,sha256=uOJGy8HFoe1mjoxnaKP9x60hcgQm03vINJpUzJfxE0A,43948
9
+ cool_seq_tool/mappers/exon_genomic_coords.py,sha256=ORYjBVaX1HO6ln0gRJyRKxUCjZrBDi4JfYQEYebxIAc,43824
10
10
  cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
11
11
  cool_seq_tool/mappers/mane_transcript.py,sha256=C9eKEj8qhVg878oUhBKPYAZS7gpLM5aaQ0HhSkUg-2g,54365
12
12
  cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
@@ -14,11 +14,11 @@ cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oI
14
14
  cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
15
15
  cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
16
16
  cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
17
- cool_seq_tool/sources/mane_transcript_mappings.py,sha256=Q6J57O2lLWXlgKT0zq3BIwkwFawySnORHOX-UxzfyDE,5399
17
+ cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
18
18
  cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
19
19
  cool_seq_tool/sources/uta_database.py,sha256=s7BkFplD_b2AmvXq8vZSCiBuZLy8RlxAqNyf-6QtR8w,36112
20
- cool_seq_tool-0.10.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
21
- cool_seq_tool-0.10.0.dist-info/METADATA,sha256=EOIMJIeXl9Om4LrUrr2ccPED68ou52_4lqTCrkWHzHQ,6557
22
- cool_seq_tool-0.10.0.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
23
- cool_seq_tool-0.10.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
24
- cool_seq_tool-0.10.0.dist-info/RECORD,,
20
+ cool_seq_tool-0.12.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
21
+ cool_seq_tool-0.12.0.dist-info/METADATA,sha256=Nt7O4bD59cQqje3eH_sKPkP8uvPz9ApxjKMvS6so0HE,6557
22
+ cool_seq_tool-0.12.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
23
+ cool_seq_tool-0.12.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
24
+ cool_seq_tool-0.12.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.7.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5