cool-seq-tool 0.14.3__py3-none-any.whl → 0.14.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@
2
2
 
3
3
  import logging
4
4
 
5
+ from ga4gh.core.models import Extension
5
6
  from ga4gh.vrs.models import SequenceLocation, SequenceReference
6
7
  from pydantic import ConfigDict, Field, StrictInt, StrictStr, model_validator
7
8
 
@@ -66,6 +67,24 @@ class TxSegment(BaseModelForbidExtra):
66
67
  ..., description="The genomic position of a transcript segment."
67
68
  )
68
69
 
70
+ @model_validator(mode="before")
71
+ def check_seg_pos(cls, values: dict) -> dict: # noqa: N805
72
+ """Ensure that only one of `start` or `end` is set in the
73
+ genomic_location field
74
+
75
+ :param values: The values in the TxSegment class
76
+ :raises ValueError: If `start` and `end` are both set in
77
+ `genomic_location`
78
+ :return: Values in model
79
+ """
80
+ loc = values.get("genomic_location")
81
+ start = getattr(loc, "start", None)
82
+ end = getattr(loc, "end", None)
83
+ if start and end:
84
+ err_msg = "Only one of `start` or `end` may be set as this describes the start or end of a transcript segment"
85
+ raise ValueError(err_msg)
86
+ return values
87
+
69
88
  model_config = ConfigDict(
70
89
  json_schema_extra={
71
90
  "example": {
@@ -78,6 +97,7 @@ class TxSegment(BaseModelForbidExtra):
78
97
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
79
98
  },
80
99
  "end": 154192135,
100
+ "extensions": [{"name": "is_exonic", "value": True}],
81
101
  },
82
102
  }
83
103
  }
@@ -135,6 +155,7 @@ class GenomicTxSeg(BaseModelForbidExtra):
135
155
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
136
156
  },
137
157
  "end": 154192135,
158
+ "extensions": [{"name": "is_exonic", "value": True}],
138
159
  },
139
160
  },
140
161
  "errors": [],
@@ -201,6 +222,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
201
222
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
202
223
  },
203
224
  "end": 154192135,
225
+ "extensions": [{"name": "is_exonic", "value": True}],
204
226
  },
205
227
  },
206
228
  "seg_end": {
@@ -213,6 +235,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
213
235
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
214
236
  },
215
237
  "start": 154170399,
238
+ "extensions": [{"name": "is_exonic", "value": True}],
216
239
  },
217
240
  },
218
241
  }
@@ -705,7 +728,12 @@ class ExonGenomicCoordsMapper:
705
728
  ), None
706
729
 
707
730
  def _get_vrs_seq_loc(
708
- self, genomic_ac: str, genomic_pos: int, is_seg_start: bool, strand: Strand
731
+ self,
732
+ genomic_ac: str,
733
+ genomic_pos: int,
734
+ is_seg_start: bool,
735
+ strand: Strand,
736
+ is_exonic: bool = True,
709
737
  ) -> tuple[SequenceLocation | None, str | None]:
710
738
  """Create VRS Sequence Location for genomic position where transcript segment
711
739
  occurs
@@ -715,6 +743,8 @@ class ExonGenomicCoordsMapper:
715
743
  :param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment
716
744
  starts. ``False`` if ``genomic_pos`` is where the transcript segment ends.
717
745
  :param strand: Strand
746
+ :param is_exonic: A boolean indicating if the genomic breakpoint occurs
747
+ on an exon. By default, this is set to ``True``.
718
748
  :return: Tuple containing VRS location (if successful) and error message (if
719
749
  unable to get GA4GH identifier for ``genomic_ac``).
720
750
  """
@@ -734,6 +764,7 @@ class ExonGenomicCoordsMapper:
734
764
  ),
735
765
  start=genomic_pos if use_start else None,
736
766
  end=genomic_pos if not use_start else None,
767
+ extensions=[Extension(name="is_exonic", value=is_exonic)],
737
768
  ), None
738
769
 
739
770
  async def _genomic_to_tx_segment(
@@ -895,6 +926,7 @@ class ExonGenomicCoordsMapper:
895
926
  # Check if breakpoint occurs on an exon.
896
927
  # If not, determine the adjacent exon given the selected transcript
897
928
  if not self._is_exonic_breakpoint(genomic_pos, tx_exons):
929
+ is_exonic = False
898
930
  exon_num = self._get_adjacent_exon(
899
931
  tx_exons_genomic_coords=tx_exons,
900
932
  strand=strand,
@@ -902,6 +934,7 @@ class ExonGenomicCoordsMapper:
902
934
  end=genomic_pos if not is_seg_start else None,
903
935
  )
904
936
  else:
937
+ is_exonic = True
905
938
  exon_data = await self.uta_db.get_tx_exon_aln_v_data(
906
939
  transcript,
907
940
  genomic_pos,
@@ -920,7 +953,7 @@ class ExonGenomicCoordsMapper:
920
953
  )
921
954
 
922
955
  genomic_location, err_msg = self._get_vrs_seq_loc(
923
- genomic_ac, genomic_pos, is_seg_start, strand
956
+ genomic_ac, genomic_pos, is_seg_start, strand, is_exonic
924
957
  )
925
958
  if err_msg:
926
959
  return GenomicTxSeg(errors=[err_msg])
@@ -3,6 +3,7 @@
3
3
  import logging
4
4
  from collections import namedtuple
5
5
  from pathlib import Path
6
+ from urllib.parse import urlparse
6
7
 
7
8
  from agct._core import ChainfileError
8
9
  from asyncpg import InvalidCatalogNameError, UndefinedTableError
@@ -11,7 +12,7 @@ from biocommons.seqrepo import SeqRepo
11
12
  from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAccess
12
13
  from cool_seq_tool.mappers.liftover import LiftOver
13
14
  from cool_seq_tool.resources.data_files import DataFile, get_data_file
14
- from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
15
+ from cool_seq_tool.sources.uta_database import UTA_DB_URL, ParseResult, UtaDatabase
15
16
 
16
17
  _logger = logging.getLogger(__name__)
17
18
 
@@ -119,14 +120,20 @@ async def check_status(
119
120
  else:
120
121
  status["liftover"] = True
121
122
 
123
+ parsed_result = ParseResult(urlparse(db_url))
124
+ sanitized_url = parsed_result.sanitized_url
122
125
  try:
123
126
  await UtaDatabase.create(db_url)
127
+ except ValueError:
128
+ _logger.exception("Database URL is not valid")
124
129
  except (OSError, InvalidCatalogNameError, UndefinedTableError):
125
- _logger.exception("Encountered error instantiating UTA at URI %s", UTA_DB_URL)
130
+ _logger.exception(
131
+ "Encountered error instantiating UTA at URI %s", sanitized_url
132
+ )
126
133
  except Exception as e:
127
134
  _logger.critical(
128
135
  "Encountered unexpected error instantiating UTA from URI %s: %s",
129
- UTA_DB_URL,
136
+ sanitized_url,
130
137
  e,
131
138
  )
132
139
  else:
@@ -5,7 +5,7 @@ import logging
5
5
  from os import environ
6
6
  from typing import Any, Literal, TypeVar
7
7
  from urllib.parse import ParseResult as UrlLibParseResult
8
- from urllib.parse import quote, unquote, urlparse
8
+ from urllib.parse import unquote, urlparse, urlunparse
9
9
 
10
10
  import asyncpg
11
11
  import boto3
@@ -101,8 +101,7 @@ class UtaDatabase:
101
101
  """
102
102
  self.schema = None
103
103
  self._connection_pool = None
104
- original_pwd = db_url.split("//")[-1].split("@")[0].split(":")[-1]
105
- self.db_url = db_url.replace(original_pwd, quote(original_pwd))
104
+ self.db_url = db_url
106
105
  self.args = self._get_conn_args()
107
106
 
108
107
  def _get_conn_args(self) -> DbConnectionArgs:
@@ -954,3 +953,28 @@ class ParseResult(UrlLibParseResult):
954
953
  """Create schema property."""
955
954
  path_elems = self.path.split("/")
956
955
  return path_elems[2] if len(path_elems) > 2 else None
956
+
957
+ @property
958
+ def sanitized_url(self) -> str:
959
+ """Sanitized DB URL with the password masked"""
960
+ netloc = ""
961
+ if self.username:
962
+ netloc += self.username
963
+ if self.password is not None and self.password != "":
964
+ netloc += ":***"
965
+ netloc += "@"
966
+ if self.hostname:
967
+ netloc += f"{self.hostname}"
968
+ if self.port:
969
+ netloc += f":{self.port}"
970
+
971
+ return urlunparse(
972
+ (
973
+ self.scheme,
974
+ netloc,
975
+ self.path,
976
+ self.params,
977
+ self.query,
978
+ self.fragment,
979
+ )
980
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cool_seq_tool
3
- Version: 0.14.3
3
+ Version: 0.14.5
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -6,20 +6,20 @@ cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2p
6
6
  cool_seq_tool/handlers/seqrepo_access.py,sha256=lRzPc8V0eZJTlefbHuVKeZTEC8-KcyPzpqX7vx3amu8,9118
7
7
  cool_seq_tool/mappers/__init__.py,sha256=tavpwkNogg_nF1J_kb6Q9jk7ezqdRz063v7BMZ4koLM,390
8
8
  cool_seq_tool/mappers/alignment.py,sha256=kWgYssM8YL-Z13H9GdpL77P7simNcbxltAs9YDXHE54,9640
9
- cool_seq_tool/mappers/exon_genomic_coords.py,sha256=t36NhWo2Rl84dgZY6qO7XFmGpfisjAqC-1ZOTRZxWvg,44757
9
+ cool_seq_tool/mappers/exon_genomic_coords.py,sha256=N6Wi7D8hs6gZi-BC4ICuWQEGeqUbBysqxG18EtIIgSk,46187
10
10
  cool_seq_tool/mappers/feature_overlap.py,sha256=X5UFClaH6ixRsO2fDLxqjywp-Z0bvNx4uzgBICy394U,9758
11
11
  cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
12
12
  cool_seq_tool/mappers/mane_transcript.py,sha256=IluiLBxPQoY-CxkpqpjEBcMlHvrNLa34wdKdQxtKgDY,54613
13
13
  cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
14
14
  cool_seq_tool/resources/data_files.py,sha256=6d1M5WjeFHdTQpzxqjQ78auQRZvIBVqH8QNCrmRRDXw,4205
15
- cool_seq_tool/resources/status.py,sha256=5UKx5FIQuyIY7FU4kSinDIM4MhLpr9_MiQDDBNt9kRo,5990
15
+ cool_seq_tool/resources/status.py,sha256=iP-4NiSmqV-D--gypZyrSqVbOWQvyBZICKQb-VinTik,6241
16
16
  cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
17
17
  cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
18
18
  cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
19
19
  cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
20
- cool_seq_tool/sources/uta_database.py,sha256=zzRzmYuybqzEg7zeuQjhK46SPK5GfbiWWNRGNJju8AI,36197
21
- cool_seq_tool-0.14.3.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
22
- cool_seq_tool-0.14.3.dist-info/METADATA,sha256=C-8jHyitpgW7cvaBDKtjfa8ClFsTil9X7yStIY73y3I,6535
23
- cool_seq_tool-0.14.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
- cool_seq_tool-0.14.3.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
25
- cool_seq_tool-0.14.3.dist-info/RECORD,,
20
+ cool_seq_tool/sources/uta_database.py,sha256=38CQ0QHHh0kA87tdgsJHJiHdJHQc06ylBYfemGFUlZc,36759
21
+ cool_seq_tool-0.14.5.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
22
+ cool_seq_tool-0.14.5.dist-info/METADATA,sha256=tgWwlBS_2Z71zUJpmO7MjDE3cNFy6GYH7W8YyJrTo0I,6535
23
+ cool_seq_tool-0.14.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
+ cool_seq_tool-0.14.5.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
25
+ cool_seq_tool-0.14.5.dist-info/RECORD,,