cool-seq-tool 0.7.1__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -87,7 +87,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
87
87
  """Model for representing a boundary for a transcript segment."""
88
88
 
89
89
  seg: TxSegment | None = Field(None, description="Transcript segment.")
90
- gene: StrictStr | None = Field(None, description="HGNC gene symbol.")
90
+ gene: StrictStr | None = Field(
91
+ None, description="Valid, case-sensitive HGNC gene symbol."
92
+ )
91
93
  genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
92
94
  tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
93
95
  errors: list[StrictStr] = Field([], description="Error messages.")
@@ -139,7 +141,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
139
141
  class GenomicTxSegService(BaseModelForbidExtra):
140
142
  """Service model for genomic and transcript data."""
141
143
 
142
- gene: StrictStr | None = Field(None, description="HGNC gene symbol.")
144
+ gene: StrictStr | None = Field(
145
+ None, description="Valid, case-sensitive HGNC gene symbol."
146
+ )
143
147
  genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
144
148
  tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
145
149
  seg_start: TxSegment | None = Field(None, description="Start transcript segment.")
@@ -292,7 +296,7 @@ class ExonGenomicCoordsMapper:
292
296
  ('NC_000001.11', 154192135, 154170399)
293
297
 
294
298
  :param transcript: RefSeq transcript accession
295
- :param gene: HGNC gene symbol
299
+ :param gene: Valid, case-sensitive HGNC gene symbol
296
300
  :param exon_start: Starting transcript exon number (1-based). If not provided,
297
301
  must provide ``exon_end``
298
302
  :param exon_start_offset: Starting exon offset
@@ -335,9 +339,6 @@ class ExonGenomicCoordsMapper:
335
339
  if errors:
336
340
  return _return_service_errors(errors)
337
341
 
338
- if gene:
339
- gene = gene.upper()
340
-
341
342
  # Get aligned genomic data (hgnc gene, alt_ac, alt_start_i, alt_end_i, strand)
342
343
  # for exon(s)
343
344
  (
@@ -455,7 +456,7 @@ class ExonGenomicCoordsMapper:
455
456
  following the breakpoint for the 3' end. For the negative strand, adjacent
456
457
  is defined as the exon following the breakpoint for the 5' end and the exon
457
458
  preceding the breakpoint for the 3' end.
458
- :param gene: gene name. Ideally, HGNC symbol. Must be given if no ``transcript``
459
+ :param gene: A valid, case-sensitive HGNC symbol. Must be given if no ``transcript``
459
460
  value is provided.
460
461
  :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
461
462
  ``seg_end_genomic``
@@ -473,9 +474,6 @@ class ExonGenomicCoordsMapper:
473
474
  if errors:
474
475
  return _return_service_errors(errors)
475
476
 
476
- if gene is not None:
477
- gene = gene.upper()
478
-
479
477
  params = {}
480
478
 
481
479
  if seg_start_genomic:
@@ -630,7 +628,7 @@ class ExonGenomicCoordsMapper:
630
628
  must provide ``tx_exon_end``
631
629
  :param tx_exon_end: Transcript's exon end coordinates. If not provided, must
632
630
  provide ``tx_exon_start``
633
- :param gene: HGNC gene symbol
631
+ :param gene: A valid, case-sensitive HGNC gene symbol
634
632
  :return: Tuple containing aligned genomic data for start and end exon and
635
633
  warnings if found
636
634
  """
@@ -755,7 +753,7 @@ class ExonGenomicCoordsMapper:
755
753
  :param transcript: The transcript to use. If this is not given, we will try the
756
754
  following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
757
755
  Compatible Transcript
758
- :param gene: HGNC gene symbol
756
+ :param gene: Valid, case-sensitive HGNC gene symbol
759
757
  :param get_nearest_transcript_junction: If ``True``, this will return the
760
758
  adjacent exon if the position specified by``seg_start_genomic`` or
761
759
  ``seg_end_genomic`` does not occur on an exon. For the positive strand, adjacent
@@ -1062,7 +1060,7 @@ class ExonGenomicCoordsMapper:
1062
1060
  :param genomic_ac: Genomic RefSeq accession
1063
1061
  :param genomic_pos: Genomic position where the transcript segment occurs
1064
1062
  :param is_seg_start: Whether or not ``genomic_pos`` represents the start position.
1065
- :param gene: HGNC gene symbol
1063
+ :param gene: Valid, case-sensitive HGNC gene symbol
1066
1064
  :param tx_ac: Transcript RefSeq accession. If not provided, will use MANE
1067
1065
  transcript
1068
1066
  :return: Transcript segment data and associated genomic metadata
@@ -1171,14 +1169,32 @@ class ExonGenomicCoordsMapper:
1171
1169
  :param end: Genomic coordinate of breakpoint
1172
1170
  :return: Exon number corresponding to adjacent exon. Will be 0-based
1173
1171
  """
1174
- for i in range(len(tx_exons_genomic_coords) - 1):
1172
+ # If a transcript has only one exon, return 0
1173
+ if len(tx_exons_genomic_coords) == 1:
1174
+ return 0
1175
+
1176
+ # Check if a breakpoint occurs before/after the transcript boundaries
1177
+ bp = start if start else end
1178
+ exon_list_len = len(tx_exons_genomic_coords) - 1
1179
+
1180
+ if strand == Strand.POSITIVE:
1181
+ if bp < tx_exons_genomic_coords[0].alt_start_i:
1182
+ return 0
1183
+ if bp > tx_exons_genomic_coords[exon_list_len].alt_end_i:
1184
+ return exon_list_len
1185
+ if strand == Strand.NEGATIVE:
1186
+ if bp > tx_exons_genomic_coords[0].alt_end_i:
1187
+ return 0
1188
+ if bp < tx_exons_genomic_coords[exon_list_len].alt_start_i:
1189
+ return exon_list_len
1190
+
1191
+ for i in range(exon_list_len):
1175
1192
  exon = tx_exons_genomic_coords[i]
1176
1193
  if start == exon.alt_start_i:
1177
1194
  break
1178
1195
  if end == exon.alt_end_i:
1179
1196
  break
1180
1197
  next_exon = tx_exons_genomic_coords[i + 1]
1181
- bp = start if start else end
1182
1198
  if strand == Strand.POSITIVE:
1183
1199
  lte_exon = exon
1184
1200
  gte_exon = next_exon
@@ -1187,6 +1203,7 @@ class ExonGenomicCoordsMapper:
1187
1203
  gte_exon = exon
1188
1204
  if bp >= lte_exon.alt_end_i and bp <= gte_exon.alt_start_i:
1189
1205
  break
1206
+
1190
1207
  # Return current exon if end position is provided, next exon if start position
1191
1208
  # is provided.
1192
1209
  return exon.ord if end else exon.ord + 1
@@ -61,7 +61,9 @@ class ManeTranscriptMappings:
61
61
  location information). The list is sorted so that a MANE Select entry comes
62
62
  first, followed by a MANE Plus Clinical entry, if available.
63
63
  """
64
- data = self.df.filter(pl.col("symbol") == gene_symbol.upper())
64
+ data = self.df.filter(
65
+ pl.col("symbol").str.to_uppercase() == gene_symbol.upper()
66
+ )
65
67
 
66
68
  if len(data) == 0:
67
69
  _logger.warning(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cool_seq_tool
3
- Version: 0.7.1
3
+ Version: 0.9.0
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -47,39 +47,39 @@ License-File: LICENSE
47
47
  Requires-Dist: asyncpg
48
48
  Requires-Dist: aiofiles
49
49
  Requires-Dist: boto3
50
- Requires-Dist: agct >=0.1.0-dev1
51
- Requires-Dist: polars ~=1.0
50
+ Requires-Dist: agct>=0.1.0-dev1
51
+ Requires-Dist: polars~=1.0
52
52
  Requires-Dist: hgvs
53
53
  Requires-Dist: biocommons.seqrepo
54
- Requires-Dist: pydantic ==2.*
55
- Requires-Dist: ga4gh.vrs ~=2.0.0a10
56
- Requires-Dist: wags-tails ~=0.1.3
54
+ Requires-Dist: pydantic==2.*
55
+ Requires-Dist: ga4gh.vrs~=2.0.0a10
56
+ Requires-Dist: wags-tails~=0.2.2
57
57
  Requires-Dist: bioutils
58
58
  Provides-Extra: dev
59
- Requires-Dist: pre-commit >=3.7.1 ; extra == 'dev'
60
- Requires-Dist: ipython ; extra == 'dev'
61
- Requires-Dist: ipykernel ; extra == 'dev'
62
- Requires-Dist: psycopg2-binary ; extra == 'dev'
63
- Requires-Dist: ruff ==0.5.0 ; extra == 'dev'
59
+ Requires-Dist: pre-commit>=3.7.1; extra == "dev"
60
+ Requires-Dist: ipython; extra == "dev"
61
+ Requires-Dist: ipykernel; extra == "dev"
62
+ Requires-Dist: psycopg2-binary; extra == "dev"
63
+ Requires-Dist: ruff==0.5.0; extra == "dev"
64
+ Provides-Extra: tests
65
+ Requires-Dist: pytest; extra == "tests"
66
+ Requires-Dist: pytest-cov; extra == "tests"
67
+ Requires-Dist: pytest-asyncio==0.18.3; extra == "tests"
68
+ Requires-Dist: mock; extra == "tests"
64
69
  Provides-Extra: docs
65
- Requires-Dist: sphinx ==6.1.3 ; extra == 'docs'
66
- Requires-Dist: sphinx-autodoc-typehints ==1.22.0 ; extra == 'docs'
67
- Requires-Dist: sphinx-autobuild ==2021.3.14 ; extra == 'docs'
68
- Requires-Dist: sphinx-copybutton ==0.5.2 ; extra == 'docs'
69
- Requires-Dist: sphinxext-opengraph ==0.8.2 ; extra == 'docs'
70
- Requires-Dist: furo ==2023.3.27 ; extra == 'docs'
71
- Requires-Dist: sphinx-github-changelog ==1.2.1 ; extra == 'docs'
72
- Provides-Extra: test
73
- Requires-Dist: pytest ; extra == 'test'
74
- Requires-Dist: pytest-cov ; extra == 'test'
75
- Requires-Dist: pytest-asyncio ==0.18.3 ; extra == 'test'
76
- Requires-Dist: mock ; extra == 'test'
70
+ Requires-Dist: sphinx==6.1.3; extra == "docs"
71
+ Requires-Dist: sphinx-autodoc-typehints==1.22.0; extra == "docs"
72
+ Requires-Dist: sphinx-autobuild==2021.3.14; extra == "docs"
73
+ Requires-Dist: sphinx-copybutton==0.5.2; extra == "docs"
74
+ Requires-Dist: sphinxext-opengraph==0.8.2; extra == "docs"
75
+ Requires-Dist: furo==2023.3.27; extra == "docs"
76
+ Requires-Dist: sphinx-github-changelog==1.2.1; extra == "docs"
77
77
 
78
78
  <h1 align="center">
79
- CoolSeqTool
79
+ Cool-Seq-Tool
80
80
  </h1>
81
81
 
82
- [![image](https://img.shields.io/pypi/v/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![image](https://img.shields.io/pypi/l/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![image](https://img.shields.io/pypi/pyversions/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![Actions status](https://github.com/genomicmedlab/cool-seq-tool/actions/workflows/checks.yaml/badge.svg)](https://github.com/genomicmedlab/cool-seq-tool/actions/checks.yaml)
82
+ [![image](https://img.shields.io/pypi/v/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.14007783.svg)](https://doi.org/10.5281/zenodo.14007783) [![image](https://img.shields.io/pypi/l/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![image](https://img.shields.io/pypi/pyversions/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![Actions status](https://github.com/genomicmedlab/cool-seq-tool/actions/workflows/checks.yaml/badge.svg)](https://github.com/genomicmedlab/cool-seq-tool/actions/checks.yaml)
83
83
 
84
84
  ---
85
85
 
@@ -90,18 +90,18 @@ CoolSeqTool
90
90
  ## Overview
91
91
 
92
92
  <!-- description -->
93
- The **CoolSeqTool** provides:
93
+ The Common Operations On Lots-Of Sequences Tool, **Cool-Seq-Tool**, provides:
94
94
 
95
- - A Pythonic API on top of sequence data of interest to tertiary analysis tools, including mappings between gene names and transcripts, [MANE transcript](https://www.ncbi.nlm.nih.gov/refseq/MANE/) descriptions, and the [Universal Transcript Archive](https://github.com/biocommons/uta)
96
- - Augmented access to the [SeqRepo](https://github.com/biocommons/biocommons.seqrepo) database, including multiple additional methods and tools
97
- - Mapping tools that combine the above to support translation between references sequences, annotation layers, and MANE transcripts
95
+ - A Pythonic API on top of sequence data of interest to tertiary analysis tools, including mappings between gene names and transcripts, [MANE transcript](https://www.ncbi.nlm.nih.gov/refseq/MANE/) descriptions, and transcript alignment data from the [Universal Transcript Archive](https://github.com/biocommons/uta)
96
+ - Augmented access to the [SeqRepo](https://github.com/biocommons/biocommons.seqrepo) database, including multiple additional methods and tools
97
+ - Mapping tools, including a transcript selection algorithm for selecting a representative transcript defined [here](https://coolseqtool.readthedocs.io/stable/transcript_selection.html), that combine the above to support translation between references sequences, annotation layers, and transcripts
98
98
  <!-- /description -->
99
99
 
100
100
  ---
101
101
 
102
102
  ## Install
103
103
 
104
- CoolSeqTool is available on [PyPI](https://pypi.org/project/cool-seq-tool)
104
+ Cool-Seq-Tool is available on [PyPI](https://pypi.org/project/cool-seq-tool)
105
105
 
106
106
  ```shell
107
107
  python3 -m pip install cool-seq-tool
@@ -113,7 +113,7 @@ See the [installation instructions](https://coolseqtool.readthedocs.io/stable/in
113
113
 
114
114
  ## Usage
115
115
 
116
- All CoolSeqTool resources can be initialized by way of a top-level class instance:
116
+ All Cool-Seq-Tool resources can be initialized by way of a top-level class instance:
117
117
 
118
118
  ```pycon
119
119
  >>> from cool_seq_tool import CoolSeqTool
@@ -6,7 +6,7 @@ cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2p
6
6
  cool_seq_tool/handlers/seqrepo_access.py,sha256=Jd19jbdUvPRPn_XWozL67ph-nSIxpb4_UUimapDrsm4,9162
7
7
  cool_seq_tool/mappers/__init__.py,sha256=O0JRxNFk8nWxD4v5ij47xelhvfVLdEXS43l2tzRuiUE,305
8
8
  cool_seq_tool/mappers/alignment.py,sha256=nV6PS3mhkQ2MD1GcpNBujBOqd3AKxYSYA9BCusFOa1o,9636
9
- cool_seq_tool/mappers/exon_genomic_coords.py,sha256=lfmzuVXaYT7w2FBDS3xhJNgETusllomFy5Utzhfhlpc,48782
9
+ cool_seq_tool/mappers/exon_genomic_coords.py,sha256=XYHWYHL9PcBIKHB_EsN1YKwmhP-KLrGyZv8yH_7huuo,49533
10
10
  cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
11
11
  cool_seq_tool/mappers/mane_transcript.py,sha256=nirxlf3EGVInFYG4fsAqiEmDdTc_h1XuPyX2ul-a7Rk,54368
12
12
  cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
@@ -14,11 +14,11 @@ cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oI
14
14
  cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
15
15
  cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
16
16
  cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
17
- cool_seq_tool/sources/mane_transcript_mappings.py,sha256=E_pj7FEBcB6HUR8yhSVibB0beMMlKJ62pK0qvl4y5nw,5358
17
+ cool_seq_tool/sources/mane_transcript_mappings.py,sha256=Q6J57O2lLWXlgKT0zq3BIwkwFawySnORHOX-UxzfyDE,5399
18
18
  cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
19
19
  cool_seq_tool/sources/uta_database.py,sha256=gc5wsKOIhvzhwFmPmqOY0hhaVfRkRSzYNa9tpBt81_U,35017
20
- cool_seq_tool-0.7.1.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
21
- cool_seq_tool-0.7.1.dist-info/METADATA,sha256=Y9_RZI2iHpmNOFwXoFCCKyHs6aXmNrzKQfyHkmqUVmQ,6226
22
- cool_seq_tool-0.7.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
23
- cool_seq_tool-0.7.1.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
24
- cool_seq_tool-0.7.1.dist-info/RECORD,,
20
+ cool_seq_tool-0.9.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
21
+ cool_seq_tool-0.9.0.dist-info/METADATA,sha256=GgGZqNTW98YoV49Reizkz2UnPq1MqJOR4jjDHlcWTDQ,6556
22
+ cool_seq_tool-0.9.0.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
23
+ cool_seq_tool-0.9.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
24
+ cool_seq_tool-0.9.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.7.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5