cool-seq-tool 0.13.1__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -75,9 +75,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
75
75
  f"Start inter-residue coordinate ({start}) is out of index on {ac}"
76
76
  )
77
77
  elif error.startswith("stop out of range"):
78
- msg = (
79
- f"End inter-residue coordinate ({end}) is out of " f"index on {ac}"
80
- )
78
+ msg = f"End inter-residue coordinate ({end}) is out of index on {ac}"
81
79
  else:
82
80
  msg = f"{e}"
83
81
  _logger.warning(msg)
@@ -93,6 +93,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
93
93
  )
94
94
  genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
95
95
  tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
96
+ strand: Strand | None = Field(
97
+ None, description="The strand that the transcript accession exists on."
98
+ )
96
99
  errors: list[StrictStr] = Field([], description="Error messages.")
97
100
 
98
101
  @model_validator(mode="before")
@@ -121,6 +124,7 @@ class GenomicTxSeg(BaseModelForbidExtra):
121
124
  "gene": "TPM3",
122
125
  "genomic_ac": "NC_000001.11",
123
126
  "tx_ac": "NM_152263.3",
127
+ "strand": -1,
124
128
  "seg": {
125
129
  "exon_ord": 0,
126
130
  "offset": 0,
@@ -147,6 +151,9 @@ class GenomicTxSegService(BaseModelForbidExtra):
147
151
  )
148
152
  genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
149
153
  tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
154
+ strand: Strand | None = Field(
155
+ None, description="The strand that the transcript exists on."
156
+ )
150
157
  seg_start: TxSegment | None = Field(None, description="Start transcript segment.")
151
158
  seg_end: TxSegment | None = Field(None, description="End transcript segment.")
152
159
  errors: list[StrictStr] = Field([], description="Error messages.")
@@ -183,6 +190,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
183
190
  "gene": "TPM3",
184
191
  "genomic_ac": "NC_000001.11",
185
192
  "tx_ac": "NM_152263.3",
193
+ "strand": -1,
186
194
  "seg_start": {
187
195
  "exon_ord": 0,
188
196
  "offset": 0,
@@ -400,6 +408,7 @@ class ExonGenomicCoordsMapper:
400
408
  gene=gene,
401
409
  genomic_ac=genomic_ac,
402
410
  tx_ac=transcript,
411
+ strand=strand,
403
412
  seg_start=seg_start,
404
413
  seg_end=seg_end,
405
414
  )
@@ -490,6 +499,7 @@ class ExonGenomicCoordsMapper:
490
499
  params["gene"] = start_tx_seg_data.gene
491
500
  params["genomic_ac"] = start_tx_seg_data.genomic_ac
492
501
  params["tx_ac"] = start_tx_seg_data.tx_ac
502
+ params["strand"] = start_tx_seg_data.strand
493
503
  params["seg_start"] = start_tx_seg_data.seg
494
504
  else:
495
505
  start_tx_seg_data = None
@@ -524,6 +534,7 @@ class ExonGenomicCoordsMapper:
524
534
  params["gene"] = end_tx_seg_data.gene
525
535
  params["genomic_ac"] = end_tx_seg_data.genomic_ac
526
536
  params["tx_ac"] = end_tx_seg_data.tx_ac
537
+ params["strand"] = end_tx_seg_data.strand
527
538
 
528
539
  params["seg_end"] = end_tx_seg_data.seg
529
540
 
@@ -765,7 +776,7 @@ class ExonGenomicCoordsMapper:
765
776
  GRCh38 by default. Will attempt to liftover if starting assembly is GRCh37
766
777
  :return: Data for a transcript segment boundary (inter-residue coordinates)
767
778
  """
768
- params = {key: None for key in GenomicTxSeg.model_fields}
779
+ params = dict.fromkeys(GenomicTxSeg.model_fields)
769
780
 
770
781
  # Validate inputs exist in UTA
771
782
  if gene:
@@ -865,16 +876,21 @@ class ExonGenomicCoordsMapper:
865
876
  if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
866
877
  genomic_pos = genomic_pos - 1 # Convert residue coordinate to inter-residue
867
878
 
868
- # Validate that the breakpoint between the first and last exon for the selected transcript
879
+ # Validate that the breakpoint occurs within 150 bp of the first and last exon for the selected transcript.
880
+ # A breakpoint beyond this range is likely erroneous.
869
881
  coordinate_check = await self._validate_genomic_breakpoint(
870
882
  pos=genomic_pos, genomic_ac=genomic_ac, tx_ac=transcript
871
883
  )
872
884
  if not coordinate_check:
873
- return GenomicTxSeg(
874
- errors=[
875
- f"{genomic_pos} on {genomic_ac} does not occur within the exons for {transcript}"
876
- ]
885
+ msg = (
886
+ f"{genomic_pos} on {genomic_ac} occurs more than 150 bp outside the "
887
+ f"exon boundaries of the {transcript} transcript, indicating this may not "
888
+ f"be a chimeric transcript junction and is unlikely to represent a "
889
+ f"contiguous coding sequence. Confirm that the genomic position "
890
+ f"{genomic_pos} is being used to represent transcript junction and not "
891
+ f"DNA breakpoint."
877
892
  )
893
+ _logger.warning(msg)
878
894
 
879
895
  # Check if breakpoint occurs on an exon.
880
896
  # If not, determine the adjacent exon given the selected transcript
@@ -913,6 +929,7 @@ class ExonGenomicCoordsMapper:
913
929
  gene=gene,
914
930
  genomic_ac=genomic_ac,
915
931
  tx_ac=transcript,
932
+ strand=strand,
916
933
  seg=TxSegment(
917
934
  exon_ord=exon_num,
918
935
  offset=offset,
@@ -955,8 +972,9 @@ class ExonGenomicCoordsMapper:
955
972
  :param pos: Genomic position on ``genomic_ac``
956
973
  :param genomic_ac: RefSeq genomic accession, e.g. ``"NC_000007.14"``
957
974
  :param transcript: A transcript accession
958
- :return: ``True`` if the coordinate falls within the first and last exon
959
- for the transcript, ``False`` if not
975
+ :return: ``True`` if the coordinate falls within 150bp of the first and last exon
976
+ for the transcript, ``False`` if not. Breakpoints past this threshold
977
+ are likely erroneous.
960
978
  """
961
979
  query = f"""
962
980
  WITH tx_boundaries AS (
@@ -968,7 +986,7 @@ class ExonGenomicCoordsMapper:
968
986
  AND alt_ac = '{genomic_ac}'
969
987
  )
970
988
  SELECT * FROM tx_boundaries
971
- WHERE {pos} between tx_boundaries.min_start and tx_boundaries.max_end
989
+ WHERE {pos} between (tx_boundaries.min_start - 150) and (tx_boundaries.max_end + 150)
972
990
  """ # noqa: S608
973
991
  results = await self.uta_db.execute_query(query)
974
992
  return bool(results)
@@ -268,7 +268,7 @@ class ManeTranscript:
268
268
  query = f"""
269
269
  SELECT alt_ac
270
270
  FROM {self.uta_db.schema}.genomic
271
- WHERE alt_ac LIKE '{genomic_tx_data.alt_ac.split('.')[0]}%'
271
+ WHERE alt_ac LIKE '{genomic_tx_data.alt_ac.split(".")[0]}%'
272
272
  {order_by_cond}
273
273
  """ # noqa: S608
274
274
  nc_acs = await self.uta_db.execute_query(query)
@@ -221,7 +221,7 @@ class UtaDatabase:
221
221
  WHERE table_schema = '{self.schema}'
222
222
  AND table_name = 'genomic'
223
223
  );
224
- """
224
+ """ # noqa: S608
225
225
  genomic_table_exists = await self.execute_query(check_table_exists)
226
226
  genomic_table_exists = genomic_table_exists[0].get("exists")
227
227
  if genomic_table_exists is None:
@@ -250,7 +250,7 @@ class UtaDatabase:
250
250
  LEFT JOIN {self.schema}.exon_aln ea ON
251
251
  (((te.exon_id = ea.tx_exon_id) AND
252
252
  (ae.exon_id = ea.alt_exon_id))));
253
- """
253
+ """ # noqa: S608
254
254
  await self.execute_query(create_genomic_table)
255
255
 
256
256
  indexes = [
@@ -325,13 +325,13 @@ class UtaDatabase:
325
325
  cds_start_end = await self.execute_query(query)
326
326
  if cds_start_end:
327
327
  cds_start_end = cds_start_end[0]
328
- if cds_start_end[0] is not None and cds_start_end[1] is not None: # noqa: RET503
328
+ if cds_start_end[0] is not None and cds_start_end[1] is not None:
329
329
  return cds_start_end[0], cds_start_end[1]
330
330
  else:
331
331
  _logger.warning(
332
332
  "Unable to get coding start/end site for accession: %s", tx_ac
333
333
  )
334
- return None
334
+ return None
335
335
 
336
336
  async def get_newest_assembly_ac(self, ac: str) -> list[str]:
337
337
  """Find accession associated to latest genomic assembly
@@ -352,7 +352,7 @@ class UtaDatabase:
352
352
  query = f"""
353
353
  SELECT ac
354
354
  FROM {self.schema}._seq_anno_most_recent
355
- WHERE ac LIKE '{ac.split('.')[0]}%'
355
+ WHERE ac LIKE '{ac.split(".")[0]}%'
356
356
  AND ((descr IS NULL) OR (descr = ''))
357
357
  {order_by_cond}
358
358
  """ # noqa: S608
@@ -499,7 +499,7 @@ class UtaDatabase:
499
499
  AND {start_pos} BETWEEN {pos_q}
500
500
  AND {end_pos} BETWEEN {pos_q}
501
501
  {order_by_cond}
502
- """
502
+ """ # noqa: S608
503
503
  result = await self.execute_query(query)
504
504
  if not result:
505
505
  _logger.warning("Unable to find transcript alignment for query: %s", query)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cool_seq_tool
3
- Version: 0.13.1
3
+ Version: 0.14.1
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -51,16 +51,16 @@ Requires-Dist: agct>=0.1.0-dev1
51
51
  Requires-Dist: polars~=1.0
52
52
  Requires-Dist: hgvs
53
53
  Requires-Dist: biocommons.seqrepo
54
- Requires-Dist: pydantic==2.*
55
- Requires-Dist: ga4gh.vrs~=2.0.0a10
54
+ Requires-Dist: pydantic<3.0,>=2.0
55
+ Requires-Dist: ga4gh.vrs<3.0,>=2.1.3
56
56
  Requires-Dist: wags-tails~=0.3.2
57
57
  Requires-Dist: bioutils
58
58
  Provides-Extra: dev
59
- Requires-Dist: pre-commit>=4.0.1; extra == "dev"
59
+ Requires-Dist: pre-commit>=4.2.0; extra == "dev"
60
60
  Requires-Dist: ipython; extra == "dev"
61
61
  Requires-Dist: ipykernel; extra == "dev"
62
62
  Requires-Dist: psycopg2-binary; extra == "dev"
63
- Requires-Dist: ruff==0.8.6; extra == "dev"
63
+ Requires-Dist: ruff==0.12.1; extra == "dev"
64
64
  Provides-Extra: tests
65
65
  Requires-Dist: pytest; extra == "tests"
66
66
  Requires-Dist: pytest-cov; extra == "tests"
@@ -3,12 +3,12 @@ cool_seq_tool/app.py,sha256=vyqlQRffC8sWZXMm-f_f-8WuTTWo3oRNfPUa_qdPV2M,4944
3
3
  cool_seq_tool/schemas.py,sha256=D0DsYAR1ZX7RONuc7X4hsPMKcZct7_2LlnE1KKVNre0,4139
4
4
  cool_seq_tool/utils.py,sha256=kesu7UnOplDzvNBg_G-_m1xMM22979nmsi4yWtweetU,2959
5
5
  cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
6
- cool_seq_tool/handlers/seqrepo_access.py,sha256=Jd19jbdUvPRPn_XWozL67ph-nSIxpb4_UUimapDrsm4,9162
6
+ cool_seq_tool/handlers/seqrepo_access.py,sha256=lRzPc8V0eZJTlefbHuVKeZTEC8-KcyPzpqX7vx3amu8,9118
7
7
  cool_seq_tool/mappers/__init__.py,sha256=4_YNwNyw_QrlhRNu1nly8Dezv81XjCIiNa7crVXEh38,305
8
8
  cool_seq_tool/mappers/alignment.py,sha256=nV6PS3mhkQ2MD1GcpNBujBOqd3AKxYSYA9BCusFOa1o,9636
9
- cool_seq_tool/mappers/exon_genomic_coords.py,sha256=MPp1cMfaTYZRJ-T9cP8SvICJbTlTCL5Yze3J1VT-oQQ,43790
9
+ cool_seq_tool/mappers/exon_genomic_coords.py,sha256=t36NhWo2Rl84dgZY6qO7XFmGpfisjAqC-1ZOTRZxWvg,44757
10
10
  cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
11
- cool_seq_tool/mappers/mane_transcript.py,sha256=C9eKEj8qhVg878oUhBKPYAZS7gpLM5aaQ0HhSkUg-2g,54365
11
+ cool_seq_tool/mappers/mane_transcript.py,sha256=2cAYi0Y_gGdPI40weH9Ud1uVBCTKuwMT0M7KFUyjzU0,54365
12
12
  cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
13
13
  cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oIjqk2sYQM,3837
14
14
  cool_seq_tool/resources/status.py,sha256=9LYSO2mOzVmoSQwllzq1mGChjtDA6j3I0S372N89clA,5683
@@ -16,9 +16,9 @@ cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5p
16
16
  cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
17
17
  cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
18
18
  cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
19
- cool_seq_tool/sources/uta_database.py,sha256=UHFLeiuk8H29CF1tNjE8T22-QaPs_fDUaqQO6Hu18yg,36175
20
- cool_seq_tool-0.13.1.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
21
- cool_seq_tool-0.13.1.dist-info/METADATA,sha256=ZhMjXeb_uT0a9hITKtjhUzVDJtlCJKLLPpOf2Cg8GpI,6579
22
- cool_seq_tool-0.13.1.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
23
- cool_seq_tool-0.13.1.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
24
- cool_seq_tool-0.13.1.dist-info/RECORD,,
19
+ cool_seq_tool/sources/uta_database.py,sha256=zzRzmYuybqzEg7zeuQjhK46SPK5GfbiWWNRGNJju8AI,36197
20
+ cool_seq_tool-0.14.1.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
21
+ cool_seq_tool-0.14.1.dist-info/METADATA,sha256=TZHF8lDyidXRdGWUoBWfYAToUdQ7H6yO-gkpUhBdOe4,6587
22
+ cool_seq_tool-0.14.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
+ cool_seq_tool-0.14.1.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
24
+ cool_seq_tool-0.14.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5