cool-seq-tool 0.13.0__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,4 +6,4 @@ from .mane_transcript import ManeTranscript
6
6
  from .exon_genomic_coords import ExonGenomicCoordsMapper
7
7
 
8
8
 
9
- __all__ = ["AlignmentMapper", "LiftOver", "ManeTranscript", "ExonGenomicCoordsMapper"]
9
+ __all__ = ["AlignmentMapper", "ExonGenomicCoordsMapper", "LiftOver", "ManeTranscript"]
@@ -93,6 +93,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
93
93
  )
94
94
  genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
95
95
  tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
96
+ strand: Strand | None = Field(
97
+ None, description="The strand that the transcript accession exists on."
98
+ )
96
99
  errors: list[StrictStr] = Field([], description="Error messages.")
97
100
 
98
101
  @model_validator(mode="before")
@@ -121,6 +124,7 @@ class GenomicTxSeg(BaseModelForbidExtra):
121
124
  "gene": "TPM3",
122
125
  "genomic_ac": "NC_000001.11",
123
126
  "tx_ac": "NM_152263.3",
127
+ "strand": -1,
124
128
  "seg": {
125
129
  "exon_ord": 0,
126
130
  "offset": 0,
@@ -147,6 +151,9 @@ class GenomicTxSegService(BaseModelForbidExtra):
147
151
  )
148
152
  genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
149
153
  tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
154
+ strand: Strand | None = Field(
155
+ None, description="The strand that the transcript exists on."
156
+ )
150
157
  seg_start: TxSegment | None = Field(None, description="Start transcript segment.")
151
158
  seg_end: TxSegment | None = Field(None, description="End transcript segment.")
152
159
  errors: list[StrictStr] = Field([], description="Error messages.")
@@ -183,6 +190,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
183
190
  "gene": "TPM3",
184
191
  "genomic_ac": "NC_000001.11",
185
192
  "tx_ac": "NM_152263.3",
193
+ "strand": -1,
186
194
  "seg_start": {
187
195
  "exon_ord": 0,
188
196
  "offset": 0,
@@ -400,6 +408,7 @@ class ExonGenomicCoordsMapper:
400
408
  gene=gene,
401
409
  genomic_ac=genomic_ac,
402
410
  tx_ac=transcript,
411
+ strand=strand,
403
412
  seg_start=seg_start,
404
413
  seg_end=seg_end,
405
414
  )
@@ -490,6 +499,7 @@ class ExonGenomicCoordsMapper:
490
499
  params["gene"] = start_tx_seg_data.gene
491
500
  params["genomic_ac"] = start_tx_seg_data.genomic_ac
492
501
  params["tx_ac"] = start_tx_seg_data.tx_ac
502
+ params["strand"] = start_tx_seg_data.strand
493
503
  params["seg_start"] = start_tx_seg_data.seg
494
504
  else:
495
505
  start_tx_seg_data = None
@@ -524,6 +534,7 @@ class ExonGenomicCoordsMapper:
524
534
  params["gene"] = end_tx_seg_data.gene
525
535
  params["genomic_ac"] = end_tx_seg_data.genomic_ac
526
536
  params["tx_ac"] = end_tx_seg_data.tx_ac
537
+ params["strand"] = end_tx_seg_data.strand
527
538
 
528
539
  params["seg_end"] = end_tx_seg_data.seg
529
540
 
@@ -865,16 +876,21 @@ class ExonGenomicCoordsMapper:
865
876
  if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
866
877
  genomic_pos = genomic_pos - 1 # Convert residue coordinate to inter-residue
867
878
 
868
- # Validate that the breakpoint between the first and last exon for the selected transcript
879
+ # Validate that the breakpoint occurs within 150 bp of the first and last exon for the selected transcript.
880
+ # A breakpoint beyond this range is likely erroneous.
869
881
  coordinate_check = await self._validate_genomic_breakpoint(
870
882
  pos=genomic_pos, genomic_ac=genomic_ac, tx_ac=transcript
871
883
  )
872
884
  if not coordinate_check:
873
- return GenomicTxSeg(
874
- errors=[
875
- f"{genomic_pos} on {genomic_ac} does not occur within the exons for {transcript}"
876
- ]
885
+ msg = (
886
+ f"{genomic_pos} on {genomic_ac} occurs more than 150 bp outside the "
887
+ f"exon boundaries of the {transcript} transcript, indicating this may not "
888
+ f"be a chimeric transcript junction and is unlikely to represent a "
889
+ f"contiguous coding sequence. Confirm that the genomic position "
890
+ f"{genomic_pos} is being used to represent transcript junction and not "
891
+ f"DNA breakpoint."
877
892
  )
893
+ _logger.warning(msg)
878
894
 
879
895
  # Check if breakpoint occurs on an exon.
880
896
  # If not, determine the adjacent exon given the selected transcript
@@ -913,6 +929,7 @@ class ExonGenomicCoordsMapper:
913
929
  gene=gene,
914
930
  genomic_ac=genomic_ac,
915
931
  tx_ac=transcript,
932
+ strand=strand,
916
933
  seg=TxSegment(
917
934
  exon_ord=exon_num,
918
935
  offset=offset,
@@ -955,8 +972,9 @@ class ExonGenomicCoordsMapper:
955
972
  :param pos: Genomic position on ``genomic_ac``
956
973
  :param genomic_ac: RefSeq genomic accession, e.g. ``"NC_000007.14"``
957
974
  :param transcript: A transcript accession
958
- :return: ``True`` if the coordinate falls within the first and last exon
959
- for the transcript, ``False`` if not
975
+ :return: ``True`` if the coordinate falls within 150bp of the first and last exon
976
+ for the transcript, ``False`` if not. Breakpoints past this threshold
977
+ are likely erroneous.
960
978
  """
961
979
  query = f"""
962
980
  WITH tx_boundaries AS (
@@ -968,7 +986,7 @@ class ExonGenomicCoordsMapper:
968
986
  AND alt_ac = '{genomic_ac}'
969
987
  )
970
988
  SELECT * FROM tx_boundaries
971
- WHERE {pos} between tx_boundaries.min_start and tx_boundaries.max_end
989
+ WHERE {pos} between (tx_boundaries.min_start - 150) and (tx_boundaries.max_end + 150)
972
990
  """ # noqa: S608
973
991
  results = await self.uta_db.execute_query(query)
974
992
  return bool(results)
@@ -1022,11 +1040,8 @@ class ExonGenomicCoordsMapper:
1022
1040
  :return ``True`` if alt_start_i should be used, ``False`` if alt_end_i should
1023
1041
  be used
1024
1042
  """
1025
- return (
1026
- is_seg_start
1027
- and strand == Strand.POSITIVE
1028
- or not is_seg_start
1029
- and strand == Strand.NEGATIVE
1043
+ return (is_seg_start and strand == Strand.POSITIVE) or (
1044
+ not is_seg_start and strand == Strand.NEGATIVE
1030
1045
  )
1031
1046
 
1032
1047
  @staticmethod
@@ -88,11 +88,11 @@ async def check_status(
88
88
  try:
89
89
  get_data_file(r)
90
90
  except FileNotFoundError:
91
- _logger.error(
91
+ _logger.exception(
92
92
  "%s does not exist at configured location %s", name_lower, declared_path
93
93
  )
94
94
  except ValueError:
95
- _logger.error(
95
+ _logger.exception(
96
96
  "%s configured at %s is not a valid file.", name_lower, declared_path
97
97
  )
98
98
  except Exception as e:
@@ -107,8 +107,8 @@ async def check_status(
107
107
  chain_file_37_to_38=chain_file_37_to_38,
108
108
  chain_file_38_to_37=chain_file_38_to_37,
109
109
  )
110
- except (FileNotFoundError, ChainfileError) as e:
111
- _logger.error("agct converter setup failed: %s", e)
110
+ except (FileNotFoundError, ChainfileError):
111
+ _logger.exception("agct converter setup failed")
112
112
  except Exception as e:
113
113
  _logger.critical("Encountered unexpected error setting up agct: %s", e)
114
114
  else:
@@ -116,10 +116,8 @@ async def check_status(
116
116
 
117
117
  try:
118
118
  await UtaDatabase.create(db_url)
119
- except (OSError, InvalidCatalogNameError, UndefinedTableError) as e:
120
- _logger.error(
121
- "Encountered error instantiating UTA at URI %s: %s", UTA_DB_URL, e
122
- )
119
+ except (OSError, InvalidCatalogNameError, UndefinedTableError):
120
+ _logger.exception("Encountered error instantiating UTA at URI %s", UTA_DB_URL)
123
121
  except Exception as e:
124
122
  _logger.critical(
125
123
  "Encountered unexpected error instantiating UTA from URI %s: %s",
@@ -134,10 +132,10 @@ async def check_status(
134
132
  sr = SeqRepo(root_dir=SEQREPO_ROOT_DIR)
135
133
  sra = SeqRepoAccess(sr)
136
134
  sra.sr["NC_000001.11"][1000:1001]
137
- except OSError as e:
138
- _logger.error("Encountered error while instantiating SeqRepo: %s", e)
135
+ except OSError:
136
+ _logger.exception("Encountered error while instantiating SeqRepo")
139
137
  except KeyError:
140
- _logger.error("SeqRepo data fetch test failed -- is it populated?")
138
+ _logger.exception("SeqRepo data fetch test failed -- is it populated?")
141
139
  except Exception as e:
142
140
  _logger.critical("Encountered unexpected error setting up SeqRepo: %s", e)
143
141
  else:
@@ -162,11 +162,11 @@ class UtaDatabase:
162
162
  database=self.args.database,
163
163
  )
164
164
  except InterfaceError as e:
165
- _logger.error(
166
- "While creating connection pool, encountered exception %s", e
165
+ _logger.exception(
166
+ "While creating connection pool, encountered exception"
167
167
  )
168
168
  msg = "Could not create connection pool"
169
- raise Exception(msg) from e
169
+ raise Exception(msg) from e # noqa: TRY002
170
170
 
171
171
  @classmethod
172
172
  async def create(
@@ -221,7 +221,7 @@ class UtaDatabase:
221
221
  WHERE table_schema = '{self.schema}'
222
222
  AND table_name = 'genomic'
223
223
  );
224
- """ # noqa: S608
224
+ """
225
225
  genomic_table_exists = await self.execute_query(check_table_exists)
226
226
  genomic_table_exists = genomic_table_exists[0].get("exists")
227
227
  if genomic_table_exists is None:
@@ -250,7 +250,7 @@ class UtaDatabase:
250
250
  LEFT JOIN {self.schema}.exon_aln ea ON
251
251
  (((te.exon_id = ea.tx_exon_id) AND
252
252
  (ae.exon_id = ea.alt_exon_id))));
253
- """ # noqa: S608
253
+ """
254
254
  await self.execute_query(create_genomic_table)
255
255
 
256
256
  indexes = [
@@ -499,7 +499,7 @@ class UtaDatabase:
499
499
  AND {start_pos} BETWEEN {pos_q}
500
500
  AND {end_pos} BETWEEN {pos_q}
501
501
  {order_by_cond}
502
- """ # noqa: S608
502
+ """
503
503
  result = await self.execute_query(query)
504
504
  if not result:
505
505
  _logger.warning("Unable to find transcript alignment for query: %s", query)
@@ -852,8 +852,8 @@ class UtaDatabase:
852
852
 
853
853
  try:
854
854
  assembly = Assembly(assembly)
855
- except ValueError as e:
856
- _logger.error(e)
855
+ except ValueError:
856
+ _logger.exception("Unable to parse %s as an Assembly", assembly)
857
857
  return None
858
858
 
859
859
  return chromosome, assembly
@@ -924,11 +924,11 @@ class UtaDatabase:
924
924
 
925
925
  try:
926
926
  get_secret_value_response = client.get_secret_value(SecretId=secret_name)
927
- except ClientError as e:
927
+ except ClientError:
928
928
  # For a list of exceptions thrown, see
929
929
  # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
930
- _logger.error(e)
931
- raise e
930
+ _logger.exception("Encountered AWS client error fetching UTA DB secret")
931
+ raise
932
932
  else:
933
933
  return get_secret_value_response["SecretString"]
934
934
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: cool_seq_tool
3
- Version: 0.13.0
3
+ Version: 0.14.0
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -53,14 +53,14 @@ Requires-Dist: hgvs
53
53
  Requires-Dist: biocommons.seqrepo
54
54
  Requires-Dist: pydantic==2.*
55
55
  Requires-Dist: ga4gh.vrs~=2.0.0a10
56
- Requires-Dist: wags-tails~=0.2.2
56
+ Requires-Dist: wags-tails~=0.3.2
57
57
  Requires-Dist: bioutils
58
58
  Provides-Extra: dev
59
- Requires-Dist: pre-commit>=3.7.1; extra == "dev"
59
+ Requires-Dist: pre-commit>=4.0.1; extra == "dev"
60
60
  Requires-Dist: ipython; extra == "dev"
61
61
  Requires-Dist: ipykernel; extra == "dev"
62
62
  Requires-Dist: psycopg2-binary; extra == "dev"
63
- Requires-Dist: ruff==0.5.0; extra == "dev"
63
+ Requires-Dist: ruff==0.8.6; extra == "dev"
64
64
  Provides-Extra: tests
65
65
  Requires-Dist: pytest; extra == "tests"
66
66
  Requires-Dist: pytest-cov; extra == "tests"
@@ -74,6 +74,7 @@ Requires-Dist: sphinx-copybutton==0.5.2; extra == "docs"
74
74
  Requires-Dist: sphinxext-opengraph==0.8.2; extra == "docs"
75
75
  Requires-Dist: furo==2023.3.27; extra == "docs"
76
76
  Requires-Dist: sphinx-github-changelog==1.2.1; extra == "docs"
77
+ Dynamic: license-file
77
78
 
78
79
  <h1 align="center">
79
80
  Cool-Seq-Tool
@@ -4,21 +4,21 @@ cool_seq_tool/schemas.py,sha256=D0DsYAR1ZX7RONuc7X4hsPMKcZct7_2LlnE1KKVNre0,4139
4
4
  cool_seq_tool/utils.py,sha256=kesu7UnOplDzvNBg_G-_m1xMM22979nmsi4yWtweetU,2959
5
5
  cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
6
6
  cool_seq_tool/handlers/seqrepo_access.py,sha256=Jd19jbdUvPRPn_XWozL67ph-nSIxpb4_UUimapDrsm4,9162
7
- cool_seq_tool/mappers/__init__.py,sha256=O0JRxNFk8nWxD4v5ij47xelhvfVLdEXS43l2tzRuiUE,305
7
+ cool_seq_tool/mappers/__init__.py,sha256=4_YNwNyw_QrlhRNu1nly8Dezv81XjCIiNa7crVXEh38,305
8
8
  cool_seq_tool/mappers/alignment.py,sha256=nV6PS3mhkQ2MD1GcpNBujBOqd3AKxYSYA9BCusFOa1o,9636
9
- cool_seq_tool/mappers/exon_genomic_coords.py,sha256=ORYjBVaX1HO6ln0gRJyRKxUCjZrBDi4JfYQEYebxIAc,43824
9
+ cool_seq_tool/mappers/exon_genomic_coords.py,sha256=uPBlI9K_IUV3xN9unnCwd6bPmqpfIWB5I-dksN_JZSw,44765
10
10
  cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
11
11
  cool_seq_tool/mappers/mane_transcript.py,sha256=C9eKEj8qhVg878oUhBKPYAZS7gpLM5aaQ0HhSkUg-2g,54365
12
12
  cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
13
13
  cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oIjqk2sYQM,3837
14
- cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
14
+ cool_seq_tool/resources/status.py,sha256=9LYSO2mOzVmoSQwllzq1mGChjtDA6j3I0S372N89clA,5683
15
15
  cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
16
16
  cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
17
17
  cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
18
18
  cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
19
- cool_seq_tool/sources/uta_database.py,sha256=V8cuog35-5BtOODu_vreDlBaP_PgBv8r7pYeTXmGsio,36111
20
- cool_seq_tool-0.13.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
21
- cool_seq_tool-0.13.0.dist-info/METADATA,sha256=JiQMP8y83nsS7tx2oJ7nWv34gY_2iW8TR0NsXPDrn_8,6557
22
- cool_seq_tool-0.13.0.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
23
- cool_seq_tool-0.13.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
24
- cool_seq_tool-0.13.0.dist-info/RECORD,,
19
+ cool_seq_tool/sources/uta_database.py,sha256=UHFLeiuk8H29CF1tNjE8T22-QaPs_fDUaqQO6Hu18yg,36175
20
+ cool_seq_tool-0.14.0.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
21
+ cool_seq_tool-0.14.0.dist-info/METADATA,sha256=1BpHtcCsaCUBvpmp1Qe1TVoZFHVvUir685EsH925PJ0,6579
22
+ cool_seq_tool-0.14.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
+ cool_seq_tool-0.14.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
24
+ cool_seq_tool-0.14.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5