cool-seq-tool 0.14.4__tar.gz → 0.15.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. cool_seq_tool-0.15.0/.github/CODEOWNERS +1 -0
  2. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/PKG-INFO +2 -2
  3. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/pyproject.toml +1 -1
  4. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/app.py +1 -0
  5. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/mappers/exon_genomic_coords.py +42 -15
  6. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/resources/status.py +10 -3
  7. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/sources/mane_transcript_mappings.py +17 -1
  8. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/sources/uta_database.py +27 -3
  9. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool.egg-info/PKG-INFO +2 -2
  10. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool.egg-info/SOURCES.txt +1 -0
  11. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool.egg-info/requires.txt +1 -1
  12. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/mappers/test_exon_genomic_coords.py +58 -14
  13. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/sources/test_mane_transcript_mappings.py +10 -1
  14. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/sources/test_uta_database.py +98 -0
  15. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.coveragerc +0 -0
  16. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.github/ISSUE_TEMPLATE/bug-report.yaml +0 -0
  17. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.github/ISSUE_TEMPLATE/feature-request.yaml +0 -0
  18. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.github/workflows/checks.yaml +0 -0
  19. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.github/workflows/pr-priority-label.yaml +0 -0
  20. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.github/workflows/release.yml +0 -0
  21. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.github/workflows/stale.yaml +0 -0
  22. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.gitignore +0 -0
  23. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.pre-commit-config.yaml +0 -0
  24. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.readthedocs.yaml +0 -0
  25. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/CITATION.cff +0 -0
  26. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/LICENSE +0 -0
  27. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/README.md +0 -0
  28. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/Makefile +0 -0
  29. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/make.bat +0 -0
  30. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/_static/img/biomart.png +0 -0
  31. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/_templates/module_summary.rst +0 -0
  32. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/changelog.rst +0 -0
  33. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/conf.py +0 -0
  34. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/contributing.rst +0 -0
  35. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/index.rst +0 -0
  36. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/install.rst +0 -0
  37. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/license.rst +0 -0
  38. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/reference/index.rst +0 -0
  39. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/transcript_selection.rst +0 -0
  40. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/usage.rst +0 -0
  41. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/setup.cfg +0 -0
  42. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/__init__.py +0 -0
  43. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/handlers/__init__.py +0 -0
  44. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/handlers/seqrepo_access.py +0 -0
  45. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/mappers/__init__.py +0 -0
  46. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/mappers/alignment.py +0 -0
  47. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/mappers/feature_overlap.py +0 -0
  48. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/mappers/liftover.py +0 -0
  49. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/mappers/mane_transcript.py +0 -0
  50. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/resources/__init__.py +0 -0
  51. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/resources/data_files.py +0 -0
  52. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/resources/transcript_mapping.tsv +0 -0
  53. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/schemas.py +0 -0
  54. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/sources/__init__.py +0 -0
  55. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/sources/transcript_mappings.py +0 -0
  56. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/utils.py +0 -0
  57. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool.egg-info/dependency_links.txt +0 -0
  58. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool.egg-info/top_level.txt +0 -0
  59. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/conftest.py +0 -0
  60. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/handlers/test_feature_overlap.py +0 -0
  61. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/handlers/test_seqrepo_access.py +0 -0
  62. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/mappers/test_alignment.py +0 -0
  63. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/mappers/test_liftover.py +0 -0
  64. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/mappers/test_mane_transcript.py +0 -0
  65. {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/test_utils.py +0 -0
@@ -0,0 +1 @@
1
+ * @GenomicMedLab/cool-seq-tool-maintainers
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cool_seq_tool
3
- Version: 0.14.4
3
+ Version: 0.15.0
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -50,7 +50,7 @@ Requires-Dist: agct>=0.1.0-dev1
50
50
  Requires-Dist: polars~=1.0
51
51
  Requires-Dist: biocommons.seqrepo
52
52
  Requires-Dist: pydantic<3.0,>=2.0
53
- Requires-Dist: ga4gh.vrs<3.0,>=2.1.3
53
+ Requires-Dist: ga4gh.vrs<3.0,>=2.1.4
54
54
  Requires-Dist: wags-tails~=0.4.0
55
55
  Requires-Dist: bioutils
56
56
  Provides-Extra: dev
@@ -30,7 +30,7 @@ dependencies = [
30
30
  "polars ~= 1.0",
31
31
  "biocommons.seqrepo",
32
32
  "pydantic >=2.0,<3.0",
33
- "ga4gh.vrs >=2.1.3,<3.0",
33
+ "ga4gh.vrs >=2.1.4,<3.0",
34
34
  "wags-tails ~= 0.4.0",
35
35
  "bioutils",
36
36
  ]
@@ -107,6 +107,7 @@ class CoolSeqTool:
107
107
  self.ex_g_coords_mapper = ExonGenomicCoordsMapper(
108
108
  self.seqrepo_access,
109
109
  self.uta_db,
110
+ self.mane_transcript,
110
111
  self.mane_transcript_mappings,
111
112
  self.liftover,
112
113
  )
@@ -2,17 +2,21 @@
2
2
 
3
3
  import logging
4
4
 
5
+ from ga4gh.core.models import Extension
5
6
  from ga4gh.vrs.models import SequenceLocation, SequenceReference
6
7
  from pydantic import ConfigDict, Field, StrictInt, StrictStr, model_validator
7
8
 
8
9
  from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
9
10
  from cool_seq_tool.mappers.liftover import LiftOver
11
+ from cool_seq_tool.mappers.mane_transcript import ManeTranscript
10
12
  from cool_seq_tool.schemas import (
13
+ AnnotationLayer,
11
14
  Assembly,
12
15
  BaseModelForbidExtra,
13
16
  CoordinateType,
14
17
  ServiceMeta,
15
18
  Strand,
19
+ TranscriptPriority,
16
20
  )
17
21
  from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
18
22
  from cool_seq_tool.sources.uta_database import GenomicAlnData, UtaDatabase
@@ -65,9 +69,6 @@ class TxSegment(BaseModelForbidExtra):
65
69
  genomic_location: SequenceLocation = Field(
66
70
  ..., description="The genomic position of a transcript segment."
67
71
  )
68
- is_exonic: bool = Field(
69
- default=True, description="If the position occurs on an exon"
70
- )
71
72
 
72
73
  @model_validator(mode="before")
73
74
  def check_seg_pos(cls, values: dict) -> dict: # noqa: N805
@@ -99,8 +100,8 @@ class TxSegment(BaseModelForbidExtra):
99
100
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
100
101
  },
101
102
  "end": 154192135,
103
+ "extensions": [{"name": "is_exonic", "value": True}],
102
104
  },
103
- "is_exonic": True,
104
105
  }
105
106
  }
106
107
  )
@@ -115,6 +116,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
115
116
  )
116
117
  genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
117
118
  tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
119
+ tx_status: TranscriptPriority | None = Field(
120
+ None, description="Transcript priority for RefSeq transcript accession"
121
+ )
118
122
  strand: Strand | None = Field(
119
123
  None, description="The strand that the transcript accession exists on."
120
124
  )
@@ -146,6 +150,7 @@ class GenomicTxSeg(BaseModelForbidExtra):
146
150
  "gene": "TPM3",
147
151
  "genomic_ac": "NC_000001.11",
148
152
  "tx_ac": "NM_152263.3",
153
+ "tx_status": "longest_compatible_remaining",
149
154
  "strand": -1,
150
155
  "seg": {
151
156
  "exon_ord": 0,
@@ -157,8 +162,8 @@ class GenomicTxSeg(BaseModelForbidExtra):
157
162
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
158
163
  },
159
164
  "end": 154192135,
165
+ "extensions": [{"name": "is_exonic", "value": True}],
160
166
  },
161
- "is_exonic": True,
162
167
  },
163
168
  "errors": [],
164
169
  }
@@ -174,6 +179,9 @@ class GenomicTxSegService(BaseModelForbidExtra):
174
179
  )
175
180
  genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
176
181
  tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
182
+ tx_status: TranscriptPriority | None = Field(
183
+ None, description="Transcript priority for RefSeq transcript accession"
184
+ )
177
185
  strand: Strand | None = Field(
178
186
  None, description="The strand that the transcript exists on."
179
187
  )
@@ -213,6 +221,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
213
221
  "gene": "TPM3",
214
222
  "genomic_ac": "NC_000001.11",
215
223
  "tx_ac": "NM_152263.3",
224
+ "tx_status": "longest_compatible_remaining",
216
225
  "strand": -1,
217
226
  "seg_start": {
218
227
  "exon_ord": 0,
@@ -224,8 +233,8 @@ class GenomicTxSegService(BaseModelForbidExtra):
224
233
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
225
234
  },
226
235
  "end": 154192135,
236
+ "extensions": [{"name": "is_exonic", "value": True}],
227
237
  },
228
- "is_exonic": True,
229
238
  },
230
239
  "seg_end": {
231
240
  "exon_ord": 7,
@@ -237,8 +246,8 @@ class GenomicTxSegService(BaseModelForbidExtra):
237
246
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
238
247
  },
239
248
  "start": 154170399,
249
+ "extensions": [{"name": "is_exonic", "value": True}],
240
250
  },
241
- "is_exonic": True,
242
251
  },
243
252
  }
244
253
  }
@@ -266,6 +275,7 @@ class ExonGenomicCoordsMapper:
266
275
  self,
267
276
  seqrepo_access: SeqRepoAccess,
268
277
  uta_db: UtaDatabase,
278
+ mane_transcript: ManeTranscript,
269
279
  mane_transcript_mappings: ManeTranscriptMappings,
270
280
  liftover: LiftOver,
271
281
  ) -> None:
@@ -290,11 +300,13 @@ class ExonGenomicCoordsMapper:
290
300
 
291
301
  :param seqrepo_access: SeqRepo instance to give access to query SeqRepo database
292
302
  :param uta_db: UtaDatabase instance to give access to query UTA database
303
+ :param mane_transcript: ManeTranscript instance to give access to ManeTranscript class
293
304
  :param mane_transcript_mappings: Instance to provide access to ManeTranscriptMappings class
294
305
  :param liftover: Instance to provide mapping between human genome assemblies
295
306
  """
296
307
  self.seqrepo_access = seqrepo_access
297
308
  self.uta_db = uta_db
309
+ self.mane_transcript = mane_transcript
298
310
  self.mane_transcript_mappings = mane_transcript_mappings
299
311
  self.liftover = liftover
300
312
 
@@ -433,6 +445,7 @@ class ExonGenomicCoordsMapper:
433
445
  gene=gene,
434
446
  genomic_ac=genomic_ac,
435
447
  tx_ac=transcript,
448
+ tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
436
449
  strand=strand,
437
450
  seg_start=seg_start,
438
451
  seg_end=seg_end,
@@ -524,6 +537,7 @@ class ExonGenomicCoordsMapper:
524
537
  params["gene"] = start_tx_seg_data.gene
525
538
  params["genomic_ac"] = start_tx_seg_data.genomic_ac
526
539
  params["tx_ac"] = start_tx_seg_data.tx_ac
540
+ params["tx_status"] = start_tx_seg_data.tx_status
527
541
  params["strand"] = start_tx_seg_data.strand
528
542
  params["seg_start"] = start_tx_seg_data.seg
529
543
  else:
@@ -559,6 +573,7 @@ class ExonGenomicCoordsMapper:
559
573
  params["gene"] = end_tx_seg_data.gene
560
574
  params["genomic_ac"] = end_tx_seg_data.genomic_ac
561
575
  params["tx_ac"] = end_tx_seg_data.tx_ac
576
+ params["tx_status"] = end_tx_seg_data.tx_status
562
577
  params["strand"] = end_tx_seg_data.strand
563
578
 
564
579
  params["seg_end"] = end_tx_seg_data.seg
@@ -730,7 +745,12 @@ class ExonGenomicCoordsMapper:
730
745
  ), None
731
746
 
732
747
  def _get_vrs_seq_loc(
733
- self, genomic_ac: str, genomic_pos: int, is_seg_start: bool, strand: Strand
748
+ self,
749
+ genomic_ac: str,
750
+ genomic_pos: int,
751
+ is_seg_start: bool,
752
+ strand: Strand,
753
+ is_exonic: bool = True,
734
754
  ) -> tuple[SequenceLocation | None, str | None]:
735
755
  """Create VRS Sequence Location for genomic position where transcript segment
736
756
  occurs
@@ -740,6 +760,8 @@ class ExonGenomicCoordsMapper:
740
760
  :param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment
741
761
  starts. ``False`` if ``genomic_pos`` is where the transcript segment ends.
742
762
  :param strand: Strand
763
+ :param is_exonic: A boolean indicating if the genomic breakpoint occurs
764
+ on an exon. By default, this is set to ``True``.
743
765
  :return: Tuple containing VRS location (if successful) and error message (if
744
766
  unable to get GA4GH identifier for ``genomic_ac``).
745
767
  """
@@ -759,6 +781,7 @@ class ExonGenomicCoordsMapper:
759
781
  ),
760
782
  start=genomic_pos if use_start else None,
761
783
  end=genomic_pos if not use_start else None,
784
+ extensions=[Extension(name="is_exonic", value=is_exonic)],
762
785
  ), None
763
786
 
764
787
  async def _genomic_to_tx_segment(
@@ -852,14 +875,18 @@ class ExonGenomicCoordsMapper:
852
875
  if mane_transcripts:
853
876
  transcript = mane_transcripts[0]["RefSeq_nuc"]
854
877
  else:
855
- # Attempt to find a coding transcript if a MANE transcript
878
+ # Attempt to find longest compatible transcript if a MANE transcript
856
879
  # cannot be found
857
- results = await self.uta_db.get_transcripts(
858
- gene=gene, alt_ac=genomic_ac
880
+ results = await self.mane_transcript.get_longest_compatible_transcript(
881
+ start_pos=genomic_pos,
882
+ end_pos=genomic_pos,
883
+ gene=gene,
884
+ alt_ac=genomic_ac,
885
+ start_annotation_layer=AnnotationLayer.GENOMIC,
859
886
  )
860
887
 
861
- if not results.is_empty():
862
- transcript = results[0]["tx_ac"][0]
888
+ if results:
889
+ transcript = results.refseq
863
890
  else:
864
891
  # Run if gene is for a noncoding transcript
865
892
  query = f"""
@@ -947,7 +974,7 @@ class ExonGenomicCoordsMapper:
947
974
  )
948
975
 
949
976
  genomic_location, err_msg = self._get_vrs_seq_loc(
950
- genomic_ac, genomic_pos, is_seg_start, strand
977
+ genomic_ac, genomic_pos, is_seg_start, strand, is_exonic
951
978
  )
952
979
  if err_msg:
953
980
  return GenomicTxSeg(errors=[err_msg])
@@ -956,12 +983,12 @@ class ExonGenomicCoordsMapper:
956
983
  gene=gene,
957
984
  genomic_ac=genomic_ac,
958
985
  tx_ac=transcript,
986
+ tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
959
987
  strand=strand,
960
988
  seg=TxSegment(
961
989
  exon_ord=exon_num,
962
990
  offset=offset,
963
991
  genomic_location=genomic_location,
964
- is_exonic=is_exonic,
965
992
  ),
966
993
  )
967
994
 
@@ -3,6 +3,7 @@
3
3
  import logging
4
4
  from collections import namedtuple
5
5
  from pathlib import Path
6
+ from urllib.parse import urlparse
6
7
 
7
8
  from agct._core import ChainfileError
8
9
  from asyncpg import InvalidCatalogNameError, UndefinedTableError
@@ -11,7 +12,7 @@ from biocommons.seqrepo import SeqRepo
11
12
  from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAccess
12
13
  from cool_seq_tool.mappers.liftover import LiftOver
13
14
  from cool_seq_tool.resources.data_files import DataFile, get_data_file
14
- from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
15
+ from cool_seq_tool.sources.uta_database import UTA_DB_URL, ParseResult, UtaDatabase
15
16
 
16
17
  _logger = logging.getLogger(__name__)
17
18
 
@@ -119,14 +120,20 @@ async def check_status(
119
120
  else:
120
121
  status["liftover"] = True
121
122
 
123
+ parsed_result = ParseResult(urlparse(db_url))
124
+ sanitized_url = parsed_result.sanitized_url
122
125
  try:
123
126
  await UtaDatabase.create(db_url)
127
+ except ValueError:
128
+ _logger.exception("Database URL is not valid")
124
129
  except (OSError, InvalidCatalogNameError, UndefinedTableError):
125
- _logger.exception("Encountered error instantiating UTA at URI %s", UTA_DB_URL)
130
+ _logger.exception(
131
+ "Encountered error instantiating UTA at URI %s", sanitized_url
132
+ )
126
133
  except Exception as e:
127
134
  _logger.critical(
128
135
  "Encountered unexpected error instantiating UTA from URI %s: %s",
129
- UTA_DB_URL,
136
+ sanitized_url,
130
137
  e,
131
138
  )
132
139
  else:
@@ -8,7 +8,7 @@ from pathlib import Path
8
8
  import polars as pl
9
9
 
10
10
  from cool_seq_tool.resources.data_files import DataFile, get_data_file
11
- from cool_seq_tool.schemas import ManeGeneData
11
+ from cool_seq_tool.schemas import ManeGeneData, TranscriptPriority
12
12
 
13
13
  _logger = logging.getLogger(__name__)
14
14
 
@@ -85,6 +85,22 @@ class ManeTranscriptMappings:
85
85
  return []
86
86
  return mane_rows.to_dicts()
87
87
 
88
+ def get_transcript_status(self, tx_ac: str) -> TranscriptPriority:
89
+ """Get MANE status for a transcript
90
+
91
+ :param tx_ac: A RefSeq transcript accession
92
+ :return: A TranscriptPriority object
93
+ """
94
+ mane_info = self.get_mane_from_transcripts([tx_ac])
95
+ if not mane_info:
96
+ return TranscriptPriority.LONGEST_COMPATIBLE_REMAINING
97
+ mane_info = mane_info[0]["MANE_status"]
98
+ return (
99
+ TranscriptPriority.MANE_SELECT
100
+ if mane_info == "MANE Select"
101
+ else TranscriptPriority.MANE_PLUS_CLINICAL
102
+ )
103
+
88
104
  def get_mane_data_from_chr_pos(
89
105
  self, alt_ac: str, start: int, end: int
90
106
  ) -> list[dict]:
@@ -5,7 +5,7 @@ import logging
5
5
  from os import environ
6
6
  from typing import Any, Literal, TypeVar
7
7
  from urllib.parse import ParseResult as UrlLibParseResult
8
- from urllib.parse import quote, unquote, urlparse
8
+ from urllib.parse import unquote, urlparse, urlunparse
9
9
 
10
10
  import asyncpg
11
11
  import boto3
@@ -101,8 +101,7 @@ class UtaDatabase:
101
101
  """
102
102
  self.schema = None
103
103
  self._connection_pool = None
104
- original_pwd = db_url.split("//")[-1].split("@")[0].split(":")[-1]
105
- self.db_url = db_url.replace(original_pwd, quote(original_pwd))
104
+ self.db_url = db_url
106
105
  self.args = self._get_conn_args()
107
106
 
108
107
  def _get_conn_args(self) -> DbConnectionArgs:
@@ -954,3 +953,28 @@ class ParseResult(UrlLibParseResult):
954
953
  """Create schema property."""
955
954
  path_elems = self.path.split("/")
956
955
  return path_elems[2] if len(path_elems) > 2 else None
956
+
957
+ @property
958
+ def sanitized_url(self) -> str:
959
+ """Sanitized DB URL with the password masked"""
960
+ netloc = ""
961
+ if self.username:
962
+ netloc += self.username
963
+ if self.password is not None and self.password != "":
964
+ netloc += ":***"
965
+ netloc += "@"
966
+ if self.hostname:
967
+ netloc += f"{self.hostname}"
968
+ if self.port:
969
+ netloc += f":{self.port}"
970
+
971
+ return urlunparse(
972
+ (
973
+ self.scheme,
974
+ netloc,
975
+ self.path,
976
+ self.params,
977
+ self.query,
978
+ self.fragment,
979
+ )
980
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cool_seq_tool
3
- Version: 0.14.4
3
+ Version: 0.15.0
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -50,7 +50,7 @@ Requires-Dist: agct>=0.1.0-dev1
50
50
  Requires-Dist: polars~=1.0
51
51
  Requires-Dist: biocommons.seqrepo
52
52
  Requires-Dist: pydantic<3.0,>=2.0
53
- Requires-Dist: ga4gh.vrs<3.0,>=2.1.3
53
+ Requires-Dist: ga4gh.vrs<3.0,>=2.1.4
54
54
  Requires-Dist: wags-tails~=0.4.0
55
55
  Requires-Dist: bioutils
56
56
  Provides-Extra: dev
@@ -6,6 +6,7 @@ CITATION.cff
6
6
  LICENSE
7
7
  README.md
8
8
  pyproject.toml
9
+ .github/CODEOWNERS
9
10
  .github/ISSUE_TEMPLATE/bug-report.yaml
10
11
  .github/ISSUE_TEMPLATE/feature-request.yaml
11
12
  .github/workflows/checks.yaml
@@ -4,7 +4,7 @@ agct>=0.1.0-dev1
4
4
  polars~=1.0
5
5
  biocommons.seqrepo
6
6
  pydantic<3.0,>=2.0
7
- ga4gh.vrs<3.0,>=2.1.3
7
+ ga4gh.vrs<3.0,>=2.1.4
8
8
  wags-tails~=0.4.0
9
9
  bioutils
10
10
 
@@ -172,6 +172,7 @@ def tpm3_exon1():
172
172
  "gene": "TPM3",
173
173
  "genomic_ac": "NC_000001.11",
174
174
  "tx_ac": "NM_152263.3",
175
+ "tx_status": "longest_compatible_remaining",
175
176
  "strand": -1,
176
177
  "seg": {
177
178
  "exon_ord": 0,
@@ -183,8 +184,8 @@ def tpm3_exon1():
183
184
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
184
185
  },
185
186
  "end": 154192135,
187
+ "extensions": [{"name": "is_exonic", "value": True}],
186
188
  },
187
- "is_exonic": True,
188
189
  },
189
190
  }
190
191
  return GenomicTxSeg(**params)
@@ -197,6 +198,7 @@ def tpm3_exon8():
197
198
  "gene": "TPM3",
198
199
  "genomic_ac": "NC_000001.11",
199
200
  "tx_ac": "NM_152263.3",
201
+ "tx_status": "longest_compatible_remaining",
200
202
  "strand": -1,
201
203
  "seg": {
202
204
  "exon_ord": 7,
@@ -208,8 +210,8 @@ def tpm3_exon8():
208
210
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
209
211
  },
210
212
  "start": 154170399,
213
+ "extensions": [{"name": "is_exonic", "value": True}],
211
214
  },
212
- "is_exonic": True,
213
215
  },
214
216
  }
215
217
  return GenomicTxSeg(**params)
@@ -222,6 +224,7 @@ def tpm3_exon1_g(tpm3_exon1):
222
224
  "gene": tpm3_exon1.gene,
223
225
  "genomic_ac": tpm3_exon1.genomic_ac,
224
226
  "tx_ac": tpm3_exon1.tx_ac,
227
+ "tx_status": tpm3_exon1.tx_status,
225
228
  "strand": tpm3_exon1.strand,
226
229
  "seg_start": tpm3_exon1.seg,
227
230
  }
@@ -235,6 +238,7 @@ def tpm3_exon8_g(tpm3_exon8):
235
238
  "gene": tpm3_exon8.gene,
236
239
  "genomic_ac": tpm3_exon8.genomic_ac,
237
240
  "tx_ac": tpm3_exon8.tx_ac,
241
+ "tx_status": tpm3_exon8.tx_status,
238
242
  "strand": tpm3_exon8.strand,
239
243
  "seg_end": tpm3_exon8.seg,
240
244
  }
@@ -248,6 +252,7 @@ def tpm3_exon1_exon8(tpm3_exon1, tpm3_exon8):
248
252
  "gene": tpm3_exon8.gene,
249
253
  "genomic_ac": tpm3_exon8.genomic_ac,
250
254
  "tx_ac": tpm3_exon8.tx_ac,
255
+ "tx_status": tpm3_exon8.tx_status,
251
256
  "strand": tpm3_exon8.strand,
252
257
  "seg_start": tpm3_exon1.seg,
253
258
  "seg_end": tpm3_exon8.seg,
@@ -269,6 +274,7 @@ def tpm3_exon1_exon8_offset(tpm3_exon1, tpm3_exon8):
269
274
  "gene": "TPM3",
270
275
  "genomic_ac": "NC_000001.11",
271
276
  "tx_ac": "NM_152263.3",
277
+ "tx_status": "longest_compatible_remaining",
272
278
  "strand": -1,
273
279
  "seg_start": tpm3_exon1_cpy.seg,
274
280
  "seg_end": tpm3_exon8_cpy.seg,
@@ -283,6 +289,7 @@ def mane_braf():
283
289
  "gene": "BRAF",
284
290
  "genomic_ac": "NC_000007.14",
285
291
  "tx_ac": "NM_004333.6",
292
+ "tx_status": "mane_select",
286
293
  "strand": -1,
287
294
  "seg_start": {
288
295
  "exon_ord": 5,
@@ -294,8 +301,8 @@ def mane_braf():
294
301
  "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
295
302
  },
296
303
  "end": 140801559,
304
+ "extensions": [{"name": "is_exonic", "value": True}],
297
305
  },
298
- "is_exonic": True,
299
306
  },
300
307
  "seg_end": {
301
308
  "exon_ord": 14,
@@ -307,8 +314,8 @@ def mane_braf():
307
314
  "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
308
315
  },
309
316
  "start": 140753336,
317
+ "extensions": [{"name": "is_exonic", "value": True}],
310
318
  },
311
- "is_exonic": True,
312
319
  },
313
320
  }
314
321
  return GenomicTxSegService(**params)
@@ -321,6 +328,7 @@ def wee1_exon2_exon11():
321
328
  "gene": "WEE1",
322
329
  "genomic_ac": "NC_000011.10",
323
330
  "tx_ac": "NM_003390.3",
331
+ "tx_status": "longest_compatible_remaining",
324
332
  "strand": 1,
325
333
  "seg_start": {
326
334
  "exon_ord": 1,
@@ -332,6 +340,7 @@ def wee1_exon2_exon11():
332
340
  "refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
333
341
  },
334
342
  "start": 9576092,
343
+ "extensions": [{"name": "is_exonic", "value": True}],
335
344
  },
336
345
  },
337
346
  "seg_end": {
@@ -344,6 +353,7 @@ def wee1_exon2_exon11():
344
353
  "refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
345
354
  },
346
355
  "end": 9588449,
356
+ "extensions": [{"name": "is_exonic", "value": True}],
347
357
  },
348
358
  },
349
359
  }
@@ -357,6 +367,7 @@ def mane_wee1_exon2_exon11():
357
367
  "gene": "WEE1",
358
368
  "genomic_ac": "NC_000011.10",
359
369
  "tx_ac": "NM_003390.4",
370
+ "tx_status": "mane_select",
360
371
  "strand": 1,
361
372
  "seg_start": {
362
373
  "exon_ord": 1,
@@ -368,6 +379,7 @@ def mane_wee1_exon2_exon11():
368
379
  "refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
369
380
  },
370
381
  "start": 9576092,
382
+ "extensions": [{"name": "is_exonic", "value": True}],
371
383
  },
372
384
  },
373
385
  "seg_end": {
@@ -380,6 +392,7 @@ def mane_wee1_exon2_exon11():
380
392
  "refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
381
393
  },
382
394
  "end": 9588449,
395
+ "extensions": [{"name": "is_exonic", "value": True}],
383
396
  },
384
397
  },
385
398
  }
@@ -393,6 +406,7 @@ def ntrk1_exon10_exon17():
393
406
  "gene": "NTRK1",
394
407
  "genomic_ac": "NC_000001.11",
395
408
  "tx_ac": "NM_002529.3",
409
+ "tx_status": "longest_compatible_remaining",
396
410
  "strand": 1,
397
411
  "seg_start": {
398
412
  "exon_ord": 9,
@@ -404,6 +418,7 @@ def ntrk1_exon10_exon17():
404
418
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
405
419
  },
406
420
  "start": 156874570,
421
+ "extensions": [{"name": "is_exonic", "value": True}],
407
422
  },
408
423
  },
409
424
  "seg_end": {
@@ -416,6 +431,7 @@ def ntrk1_exon10_exon17():
416
431
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
417
432
  },
418
433
  "end": 156881850,
434
+ "extensions": [{"name": "is_exonic", "value": True}],
419
435
  },
420
436
  },
421
437
  }
@@ -429,6 +445,7 @@ def zbtb10_exon3_end():
429
445
  "gene": "ZBTB10",
430
446
  "genomic_ac": "NC_000008.11",
431
447
  "tx_ac": "NM_001105539.3",
448
+ "tx_status": "mane_select",
432
449
  "strand": 1,
433
450
  "seg_start": None,
434
451
  "seg_end": {
@@ -441,8 +458,8 @@ def zbtb10_exon3_end():
441
458
  "refgetAccession": "SQ.209Z7zJ-mFypBEWLk4rNC6S_OxY5p7bs",
442
459
  },
443
460
  "end": 80514010,
461
+ "extensions": [{"name": "is_exonic", "value": False}],
444
462
  },
445
- "is_exonic": False,
446
463
  },
447
464
  }
448
465
  return GenomicTxSegService(**params)
@@ -455,6 +472,7 @@ def zbtb10_exon5_start():
455
472
  "gene": "ZBTB10",
456
473
  "genomic_ac": "NC_000008.11",
457
474
  "tx_ac": "NM_001105539.3",
475
+ "tx_status": "mane_select",
458
476
  "strand": 1,
459
477
  "seg_start": {
460
478
  "exon_ord": 4,
@@ -466,8 +484,8 @@ def zbtb10_exon5_start():
466
484
  "refgetAccession": "SQ.209Z7zJ-mFypBEWLk4rNC6S_OxY5p7bs",
467
485
  },
468
486
  "start": 80518580,
487
+ "extensions": [{"name": "is_exonic", "value": False}],
469
488
  },
470
- "is_exonic": False,
471
489
  },
472
490
  "seg_end": None,
473
491
  }
@@ -481,6 +499,7 @@ def tpm3_exon6_end():
481
499
  "gene": "TPM3",
482
500
  "genomic_ac": "NC_000001.11",
483
501
  "tx_ac": "NM_152263.4",
502
+ "tx_status": "mane_select",
484
503
  "strand": -1,
485
504
  "seg_start": None,
486
505
  "seg_end": {
@@ -493,8 +512,8 @@ def tpm3_exon6_end():
493
512
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
494
513
  },
495
514
  "start": 154171410,
515
+ "extensions": [{"name": "is_exonic", "value": False}],
496
516
  },
497
- "is_exonic": False,
498
517
  },
499
518
  }
500
519
  return GenomicTxSegService(**params)
@@ -507,6 +526,7 @@ def tpm3_exon5_start():
507
526
  "gene": "TPM3",
508
527
  "genomic_ac": "NC_000001.11",
509
528
  "tx_ac": "NM_152263.4",
529
+ "tx_status": "mane_select",
510
530
  "strand": -1,
511
531
  "seg_start": {
512
532
  "exon_ord": 4,
@@ -518,8 +538,8 @@ def tpm3_exon5_start():
518
538
  "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
519
539
  },
520
540
  "end": 154173080,
541
+ "extensions": [{"name": "is_exonic", "value": False}],
521
542
  },
522
- "is_exonic": False,
523
543
  },
524
544
  "seg_end": None,
525
545
  }
@@ -533,6 +553,7 @@ def gusbp3_exon2_end():
533
553
  "gene": "GUSBP3",
534
554
  "genomic_ac": "NC_000005.10",
535
555
  "tx_ac": "NR_027386.2",
556
+ "tx_status": "longest_compatible_remaining",
536
557
  "strand": -1,
537
558
  "seg_start": None,
538
559
  "seg_end": {
@@ -545,8 +566,8 @@ def gusbp3_exon2_end():
545
566
  "refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI",
546
567
  },
547
568
  "start": 69680764,
569
+ "extensions": [{"name": "is_exonic", "value": False}],
548
570
  },
549
- "is_exonic": False,
550
571
  },
551
572
  }
552
573
  return GenomicTxSegService(**params)
@@ -559,6 +580,7 @@ def eln_grch38_intronic():
559
580
  "gene": "ELN",
560
581
  "genomic_ac": "NC_000007.14",
561
582
  "tx_ac": "NM_000501.4",
583
+ "tx_status": "mane_select",
562
584
  "strand": 1,
563
585
  "seg_start": {
564
586
  "exon_ord": 0,
@@ -570,8 +592,8 @@ def eln_grch38_intronic():
570
592
  "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
571
593
  },
572
594
  "start": 74028173,
595
+ "extensions": [{"name": "is_exonic", "value": True}],
573
596
  },
574
- "is_exonic": True,
575
597
  },
576
598
  "seg_end": {
577
599
  "exon_ord": 7,
@@ -583,8 +605,8 @@ def eln_grch38_intronic():
583
605
  "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
584
606
  },
585
607
  "end": 74043599,
608
+ "extensions": [{"name": "is_exonic", "value": False}],
586
609
  },
587
- "is_exonic": False,
588
610
  },
589
611
  }
590
612
  return GenomicTxSegService(**params)
@@ -597,6 +619,7 @@ def gusbp3_exon5_start():
597
619
  "gene": "GUSBP3",
598
620
  "genomic_ac": "NC_000005.10",
599
621
  "tx_ac": "NR_027386.2",
622
+ "tx_status": "longest_compatible_remaining",
600
623
  "strand": -1,
601
624
  "seg_start": {
602
625
  "exon_ord": 4,
@@ -608,8 +631,8 @@ def gusbp3_exon5_start():
608
631
  "refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI",
609
632
  },
610
633
  "end": 69645878,
634
+ "extensions": [{"name": "is_exonic", "value": False}],
611
635
  },
612
- "is_exonic": False,
613
636
  },
614
637
  "seg_end": None,
615
638
  }
@@ -639,6 +662,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True):
639
662
  assert actual.gene == expected.gene
640
663
  assert actual.genomic_ac == expected.genomic_ac
641
664
  assert actual.tx_ac == expected.tx_ac
665
+ assert actual.tx_status == expected.tx_status
642
666
  assert actual.strand == expected.strand
643
667
 
644
668
  for seg_attr in ["seg_start", "seg_end"]:
@@ -660,13 +684,17 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True):
660
684
  assert (
661
685
  actual_seg.genomic_location.end == expected_seg.genomic_location.end
662
686
  )
663
- assert actual_seg.is_exonic == expected_seg.is_exonic
687
+ assert (
688
+ actual_seg.genomic_location.extensions
689
+ == expected_seg.genomic_location.extensions
690
+ )
664
691
 
665
692
  assert actual.errors == expected.errors
666
693
  else:
667
694
  assert actual.gene is None
668
695
  assert actual.genomic_ac is None
669
696
  assert actual.tx_ac is None
697
+ assert actual.tx_status is None
670
698
  assert actual.strand is None
671
699
  assert actual.seg_start is None
672
700
  assert actual.seg_end is None
@@ -711,6 +739,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
711
739
  assert actual.gene == expected.gene
712
740
  assert actual.genomic_ac == expected.genomic_ac
713
741
  assert actual.tx_ac == expected.tx_ac
742
+ assert actual.tx_status == expected.tx_status
714
743
  assert actual.strand == expected.strand
715
744
 
716
745
  expected_seg = expected.seg
@@ -728,13 +757,17 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
728
757
  actual_seg.genomic_location.start == expected_seg.genomic_location.start
729
758
  )
730
759
  assert actual_seg.genomic_location.end == expected_seg.genomic_location.end
731
- assert actual_seg.is_exonic == expected_seg.is_exonic
760
+ assert (
761
+ actual_seg.genomic_location.extensions
762
+ == expected_seg.genomic_location.extensions
763
+ )
732
764
 
733
765
  assert actual.errors == expected.errors
734
766
  else:
735
767
  assert actual.gene is None
736
768
  assert actual.genomic_ac is None
737
769
  assert actual.tx_ac is None
770
+ assert actual.tx_status is None
738
771
  assert actual.strand is None
739
772
  assert actual.seg is None
740
773
  assert len(actual.errors) > 0
@@ -1038,6 +1071,17 @@ async def test_genomic_to_transcript_fusion_context(
1038
1071
  resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
1039
1072
  genomic_tx_seg_service_checks(resp, gusbp3_exon5_start)
1040
1073
 
1074
+ # Test case where gene does not have a MANE transcript. We are looking
1075
+ # to check that the same transcript accession is returned across runs
1076
+ inputs = {
1077
+ "genomic_ac": "NC_000001.11",
1078
+ "seg_end_genomic": 156421555,
1079
+ "gene": "MIR9-1HG",
1080
+ }
1081
+ resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
1082
+ assert resp.tx_ac == "NM_001320454.2"
1083
+ assert resp.tx_status == "longest_compatible_remaining"
1084
+
1041
1085
 
1042
1086
  @pytest.mark.asyncio
1043
1087
  async def test_get_alt_ac_start_and_end(
@@ -5,7 +5,7 @@ from unittest.mock import patch
5
5
  import polars as pl
6
6
  import pytest
7
7
 
8
- from cool_seq_tool.schemas import ManeGeneData
8
+ from cool_seq_tool.schemas import ManeGeneData, TranscriptPriority
9
9
 
10
10
 
11
11
  @pytest.fixture(scope="module")
@@ -168,6 +168,15 @@ def test_get_mane_from_transcripts(
168
168
  assert resp == []
169
169
 
170
170
 
171
+ def test_get_transcript_status(test_mane_transcript_mappings):
172
+ """Test that get_transcript_status works correctly"""
173
+ actual = test_mane_transcript_mappings.get_transcript_status("NM_152263.4")
174
+ assert actual == TranscriptPriority.MANE_SELECT
175
+
176
+ actual = test_mane_transcript_mappings.get_transcript_status("NM_152263.3")
177
+ assert actual == TranscriptPriority.LONGEST_COMPATIBLE_REMAINING
178
+
179
+
171
180
  def test_get_mane_data_from_chr_pos(
172
181
  test_mane_transcript_mappings, braf_select, braf_plus_clinical
173
182
  ):
@@ -1,11 +1,14 @@
1
1
  """Test UTA data source."""
2
2
 
3
+ from urllib.parse import urlparse
4
+
3
5
  import pytest
4
6
 
5
7
  from cool_seq_tool.schemas import Strand
6
8
  from cool_seq_tool.sources.uta_database import (
7
9
  GenomicTxData,
8
10
  GenomicTxMetadata,
11
+ ParseResult,
9
12
  TxExonAlnData,
10
13
  )
11
14
 
@@ -360,3 +363,98 @@ async def test_get_mane_transcripts_from_genomic_pos(test_db):
360
363
  # invalid ac
361
364
  resp = await test_db.get_transcripts_from_genomic_pos("NC_000007.14232", 140753336)
362
365
  assert resp == []
366
+
367
+
368
+ @pytest.mark.parametrize(
369
+ ("raw_url", "expected"),
370
+ [
371
+ # Username + password
372
+ (
373
+ "postgresql://user:pass@localhost:5432/dbname",
374
+ {
375
+ "scheme": "postgresql",
376
+ "username": "user",
377
+ "password": "pass",
378
+ "hostname": "localhost",
379
+ "port": 5432,
380
+ "database": "dbname",
381
+ "sanitized_url": "postgresql://user:***@localhost:5432/dbname",
382
+ },
383
+ ),
384
+ # Username with null password
385
+ (
386
+ "postgresql://user@localhost/dbname",
387
+ {
388
+ "scheme": "postgresql",
389
+ "username": "user",
390
+ "password": None,
391
+ "hostname": "localhost",
392
+ "port": None,
393
+ "database": "dbname",
394
+ "sanitized_url": "postgresql://user@localhost/dbname",
395
+ },
396
+ ),
397
+ # Password is "0"
398
+ (
399
+ "postgresql://user:0@localhost/dbname",
400
+ {
401
+ "scheme": "postgresql",
402
+ "username": "user",
403
+ "password": "0",
404
+ "hostname": "localhost",
405
+ "port": None,
406
+ "database": "dbname",
407
+ "sanitized_url": "postgresql://user:***@localhost/dbname",
408
+ },
409
+ ),
410
+ # Empty password
411
+ (
412
+ "postgresql://user:@localhost/dbname",
413
+ {
414
+ "scheme": "postgresql",
415
+ "username": "user",
416
+ "password": "",
417
+ "hostname": "localhost",
418
+ "port": None,
419
+ "database": "dbname",
420
+ "sanitized_url": "postgresql://user@localhost/dbname",
421
+ },
422
+ ),
423
+ # No username
424
+ (
425
+ "postgresql://localhost:5432/dbname",
426
+ {
427
+ "scheme": "postgresql",
428
+ "username": None,
429
+ "password": None,
430
+ "hostname": "localhost",
431
+ "port": 5432,
432
+ "database": "dbname",
433
+ "sanitized_url": "postgresql://localhost:5432/dbname",
434
+ },
435
+ ),
436
+ # With query params
437
+ (
438
+ "postgresql://user:secret@localhost/dbname?query#fragment",
439
+ {
440
+ "scheme": "postgresql",
441
+ "username": "user",
442
+ "password": "secret",
443
+ "hostname": "localhost",
444
+ "port": None,
445
+ "database": "dbname",
446
+ "sanitized_url": "postgresql://user:***@localhost/dbname?query#fragment",
447
+ },
448
+ ),
449
+ ],
450
+ )
451
+ async def test_parsed_url(raw_url, expected):
452
+ parsed_result = ParseResult(urlparse(raw_url))
453
+
454
+ assert parsed_result.scheme == expected["scheme"]
455
+ assert parsed_result.username == expected["username"]
456
+ assert parsed_result.password == expected["password"]
457
+ assert parsed_result.hostname == expected["hostname"]
458
+ assert parsed_result.port == expected["port"]
459
+ assert parsed_result.database == expected["database"]
460
+ assert parsed_result.sanitized_url == expected["sanitized_url"]
File without changes
File without changes
File without changes