cool-seq-tool 0.4.0.dev1__py3-none-any.whl → 0.4.0.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
1
  """Module for data"""
2
- from .data_downloads import DataDownload # noqa: F401
2
+ from .data_downloads import DataDownload
@@ -4,7 +4,6 @@ import gzip
4
4
  import logging
5
5
  import shutil
6
6
  from ftplib import FTP
7
- from os import remove
8
7
  from pathlib import Path
9
8
 
10
9
  from dateutil import parser
@@ -38,18 +37,20 @@ class DataDownload:
38
37
  files = ftp.nlst()
39
38
  mane_summary_file = [f for f in files if f.endswith(".summary.txt.gz")]
40
39
  if not mane_summary_file:
41
- raise Exception("Unable to download MANE summary data")
40
+ msg = "Unable to download MANE summary data"
41
+ raise Exception(msg)
42
42
  mane_summary_file = mane_summary_file[0]
43
43
  self._mane_summary_path = self._data_dir / mane_summary_file[:-3]
44
44
  mane_data_path = self._data_dir / mane_summary_file
45
45
  if not self._mane_summary_path.exists():
46
46
  logger.info("Downloading MANE summary file from NCBI.")
47
- with open(mane_data_path, "wb") as fp:
47
+ with mane_data_path.open("wb") as fp:
48
48
  ftp.retrbinary(f"RETR {mane_summary_file}", fp.write)
49
- with gzip.open(mane_data_path, "rb") as f_in:
50
- with open(self._mane_summary_path, "wb") as f_out:
51
- shutil.copyfileobj(f_in, f_out)
52
- remove(mane_data_path)
49
+ with gzip.open(
50
+ mane_data_path, "rb"
51
+ ) as f_in, self._mane_summary_path.open("wb") as f_out:
52
+ shutil.copyfileobj(f_in, f_out)
53
+ mane_data_path.unlink()
53
54
  logger.info("MANE summary file download complete.")
54
55
  return self._mane_summary_path
55
56
 
@@ -66,18 +67,23 @@ class DataDownload:
66
67
  ftp_file_path = f"{ftp_dir_path}{lrg_refseqgene_file}"
67
68
  timestamp = ftp.voidcmd(f"MDTM {ftp_file_path}")[4:].strip()
68
69
  date = str(parser.parse(timestamp)).split()[0]
69
- version = datetime.datetime.strptime(date, "%Y-%m-%d").strftime("%Y%m%d")
70
+ version = (
71
+ datetime.datetime.strptime(date, "%Y-%m-%d")
72
+ .astimezone(tz=datetime.timezone.utc)
73
+ .strftime("%Y%m%d")
74
+ )
70
75
  fn_versioned = f"{lrg_refseqgene_file}_{version}"
71
76
  lrg_refseqgene_path = self._data_dir / lrg_refseqgene_file
72
77
  self._lrg_refseqgene_path = self._data_dir / fn_versioned
73
78
  if not self._lrg_refseqgene_path.exists():
74
79
  logger.info("Downloading LRG RefSeq data from NCBI.")
75
80
  ftp.cwd(ftp_dir_path)
76
- with open(lrg_refseqgene_path, "wb") as fp:
81
+ with lrg_refseqgene_path.open("wb") as fp:
77
82
  ftp.retrbinary(f"RETR {lrg_refseqgene_file}", fp.write)
78
- with open(lrg_refseqgene_path, "rb") as f_in:
79
- with open(self._lrg_refseqgene_path, "wb") as f_out:
80
- shutil.copyfileobj(f_in, f_out)
81
- remove(lrg_refseqgene_path)
83
+ with lrg_refseqgene_path.open(
84
+ "rb"
85
+ ) as f_in, self._lrg_refseqgene_path.open("wb") as f_out:
86
+ shutil.copyfileobj(f_in, f_out)
87
+ lrg_refseqgene_path.unlink()
82
88
  logger.info("LRG RefSeq data download complete.")
83
89
  return self._lrg_refseqgene_path
@@ -152,8 +152,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
152
152
  acs.append(ac.split("refseq:")[-1])
153
153
  if acs:
154
154
  return acs, None
155
- else:
156
- return None, f"{chromosome} is not a valid chromosome"
155
+ return None, f"{chromosome} is not a valid chromosome"
157
156
 
158
157
  def ac_to_chromosome(self, ac: str) -> Tuple[Optional[str], Optional[str]]:
159
158
  """Get chromosome for accession.
@@ -172,8 +171,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
172
171
  )[0]
173
172
  if aliases is None:
174
173
  return None, f"Unable to get chromosome for {ac}"
175
- else:
176
- return aliases, None
174
+ return aliases, None
177
175
 
178
176
  def get_fasta_file(self, sequence_id: str, outfile_path: Path) -> None:
179
177
  """Retrieve FASTA file containing sequence for requested sequence ID.
@@ -112,8 +112,7 @@ class ExonGenomicCoordsMapper:
112
112
  # Ensure valid inputs
113
113
  if not transcript:
114
114
  return self._return_warnings(resp, "Must provide `transcript`")
115
- else:
116
- transcript = transcript.strip()
115
+ transcript = transcript.strip()
117
116
 
118
117
  exon_start_exists, exon_end_exists = False, False
119
118
  if exon_start is not None:
@@ -130,12 +129,11 @@ class ExonGenomicCoordsMapper:
130
129
  return self._return_warnings(
131
130
  resp, "Must provide either `exon_start` or `exon_end`"
132
131
  )
133
- elif exon_start_exists and exon_end_exists:
134
- if exon_start > exon_end:
135
- return self._return_warnings(
136
- resp,
137
- f"Start exon {exon_start} is greater than end exon {exon_end}",
138
- )
132
+ if exon_start_exists and exon_end_exists and (exon_start > exon_end):
133
+ return self._return_warnings(
134
+ resp,
135
+ f"Start exon {exon_start} is greater than end exon {exon_end}",
136
+ )
139
137
 
140
138
  # Get all exons and associated start/end coordinates for transcript
141
139
  tx_exons, warning = await self.uta_db.get_tx_exons(transcript)
@@ -266,7 +264,7 @@ class ExonGenomicCoordsMapper:
266
264
  if start is None and end is None:
267
265
  return self._return_warnings(resp, "Must provide either `start` or `end`")
268
266
 
269
- params = {key: None for key in GenomicData.model_fields.keys()}
267
+ params = {key: None for key in GenomicData.model_fields}
270
268
  if gene is not None:
271
269
  gene = gene.upper().strip()
272
270
 
@@ -312,13 +310,12 @@ class ExonGenomicCoordsMapper:
312
310
 
313
311
  for field in ["transcript", "gene", "chr", "strand"]:
314
312
  if start_data:
315
- if end_data:
316
- if start_data[field] != end_data[field]:
317
- msg = (
318
- f"Start `{field}`, {start_data[field]}, does "
319
- f"not match End `{field}`, {end_data[field]}"
320
- )
321
- return self._return_warnings(resp, msg)
313
+ if end_data and (start_data[field] != end_data[field]):
314
+ msg = (
315
+ f"Start `{field}`, {start_data[field]}, does "
316
+ f"not match End `{field}`, {end_data[field]}"
317
+ )
318
+ return self._return_warnings(resp, msg)
322
319
  params[field] = start_data[field]
323
320
  else:
324
321
  params[field] = end_data[field]
@@ -440,7 +437,10 @@ class ExonGenomicCoordsMapper:
440
437
  else:
441
438
  error = "Strand does not match"
442
439
  logger.warning(
443
- f"{error}: {alt_ac_data['start'][i]} != {alt_ac_data['end'][i]}"
440
+ "%s: %s != %s",
441
+ error,
442
+ alt_ac_data["start"][i],
443
+ alt_ac_data["end"][i],
444
444
  )
445
445
  return None, error
446
446
  return tuple(alt_ac_data_values), None
@@ -482,7 +482,7 @@ class ExonGenomicCoordsMapper:
482
482
  resp, "Must provide either `gene` or `transcript`"
483
483
  )
484
484
 
485
- params = {key: None for key in TranscriptExonData.model_fields.keys()}
485
+ params = {key: None for key in TranscriptExonData.model_fields}
486
486
 
487
487
  if alt_ac:
488
488
  # Check if valid accession is given
@@ -550,7 +550,7 @@ class ExonGenomicCoordsMapper:
550
550
  len_alt_acs = len(alt_acs)
551
551
  if len_alt_acs > 1:
552
552
  return None, f"Found more than one accessions: {alt_acs}"
553
- elif len_alt_acs == 0:
553
+ if len_alt_acs == 0:
554
554
  return None, "No genomic accessions found"
555
555
  alt_ac = next(iter(alt_acs))
556
556
 
@@ -565,13 +565,12 @@ class ExonGenomicCoordsMapper:
565
565
  elif len_genes == 0:
566
566
  return None, "No genes found"
567
567
 
568
- if input_gene is not None:
569
- if output_gene != input_gene.upper():
570
- return (
571
- None,
572
- f"Input gene, {input_gene}, does not match "
573
- f"expected output gene, {output_gene}",
574
- )
568
+ if input_gene is not None and output_gene != input_gene.upper():
569
+ return (
570
+ None,
571
+ f"Input gene, {input_gene}, does not match "
572
+ f"expected output gene, {output_gene}",
573
+ )
575
574
 
576
575
  gene = output_gene if output_gene else input_gene
577
576
  return (gene, alt_ac), None
@@ -13,7 +13,7 @@ constraints and data models for coordinate representation.
13
13
  """
14
14
  import logging
15
15
  import math
16
- from enum import StrEnum
16
+ from enum import Enum
17
17
  from typing import Dict, List, Optional, Set, Tuple, Union
18
18
 
19
19
  import polars as pl
@@ -37,7 +37,7 @@ from cool_seq_tool.utils import get_inter_residue_pos
37
37
  logger = logging.getLogger(__name__)
38
38
 
39
39
 
40
- class EndAnnotationLayer(StrEnum):
40
+ class EndAnnotationLayer(str, Enum):
41
41
  """Define constraints for end annotation layer. This is used for determining the
42
42
  end annotation layer when getting the longest compatible remaining representation
43
43
  """
@@ -143,10 +143,7 @@ class ManeTranscript:
143
143
  :return: cDNA position start, cDNA position end
144
144
  """
145
145
  start_pos = start * 3
146
- if end != start:
147
- end_pos = end * 3
148
- else:
149
- end_pos = start_pos
146
+ end_pos = end * 3 if end != start else start_pos
150
147
  return start_pos, end_pos - 1
151
148
 
152
149
  async def _p_to_c(
@@ -170,10 +167,10 @@ class ManeTranscript:
170
167
  elif ac.startswith("ENSP"):
171
168
  ac = self.transcript_mappings.ensp_to_enst[ac]
172
169
  else:
173
- logger.warning(f"Unable to find accession: {ac}")
170
+ logger.warning("Unable to find accession: %s", ac)
174
171
  return None
175
172
  except KeyError:
176
- logger.warning(f"{ac} not found in transcript_mappings")
173
+ logger.warning("%s not found in transcript_mappings", ac)
177
174
  return None
178
175
 
179
176
  pos = self._p_to_c_pos(start_pos, end_pos)
@@ -190,14 +187,16 @@ class ManeTranscript:
190
187
  # UTA does not store ENST versions
191
188
  # So we want to make sure version is valid
192
189
  if ac.startswith("ENST"):
193
- if not self.transcript_mappings.ensembl_transcript_version_to_gene_symbol.get(
194
- ac
195
- ):
196
- if not self.seqrepo_access.get_reference_sequence(ac, start=1, end=1)[
190
+ if (
191
+ not self.transcript_mappings.ensembl_transcript_version_to_gene_symbol.get(
192
+ ac
193
+ )
194
+ and not self.seqrepo_access.get_reference_sequence(ac, start=1, end=1)[
197
195
  0
198
- ]:
199
- logger.warning(f"Ensembl transcript not found: {ac}")
200
- return None
196
+ ]
197
+ ):
198
+ logger.warning("Ensembl transcript not found: %s", ac)
199
+ return None
201
200
 
202
201
  temp_ac = ac.split(".")[0]
203
202
  else:
@@ -206,15 +205,14 @@ class ManeTranscript:
206
205
  # c. coordinate does not contain cds start, so we need to add it
207
206
  cds_start_end = await self.uta_db.get_cds_start_end(temp_ac)
208
207
  if not cds_start_end:
209
- logger.warning(f"Accession {temp_ac} not found in UTA")
208
+ logger.warning("Accession %s not found in UTA", temp_ac)
210
209
  return None
211
210
  coding_start_site = cds_start_end[0]
212
211
  pos = pos[0] + coding_start_site, pos[1] + coding_start_site
213
212
 
214
- genomic_tx_data = await self._get_and_validate_genomic_tx_data(
213
+ return await self._get_and_validate_genomic_tx_data(
215
214
  ac, pos, AnnotationLayer.CDNA, coding_start_site=coding_start_site
216
215
  )
217
- return genomic_tx_data
218
216
 
219
217
  async def _get_and_validate_genomic_tx_data(
220
218
  self,
@@ -240,8 +238,10 @@ class ManeTranscript:
240
238
  )
241
239
  if not genomic_tx_data:
242
240
  logger.warning(
243
- f"Unable to find genomic_tx_data for {alt_ac} at position"
244
- f" {pos} on annotation layer {annotation_layer}"
241
+ "Unable to find genomic_tx_data for %s at position %s on annotation layer %s",
242
+ alt_ac,
243
+ pos,
244
+ annotation_layer,
245
245
  )
246
246
  return None
247
247
  genomic_tx_data["coding_start_site"] = coding_start_site
@@ -256,9 +256,9 @@ class ManeTranscript:
256
256
  # Validation check: Exon structure
257
257
  if og_alt_exon_id != liftover_alt_exon_id:
258
258
  logger.warning(
259
- f"Original alt_exon_id {og_alt_exon_id} "
260
- f"does not match liftover alt_exon_id "
261
- f"{liftover_alt_exon_id}"
259
+ "Original alt_exon_id %s does not match liftover alt_exon_id %s",
260
+ og_alt_exon_id,
261
+ liftover_alt_exon_id,
262
262
  )
263
263
  return None
264
264
 
@@ -294,7 +294,9 @@ class ManeTranscript:
294
294
 
295
295
  if lt_cds_start or gt_cds_end:
296
296
  logger.info(
297
- f"{refseq_c_ac} with position {c_pos_change} is not within CDS start/end"
297
+ "%s with position %s is not within CDS start/end",
298
+ refseq_c_ac,
299
+ c_pos_change,
298
300
  )
299
301
 
300
302
  return CdnaRepresentation(
@@ -380,13 +382,12 @@ class ManeTranscript:
380
382
 
381
383
  if not result:
382
384
  logger.warning(
383
- f"Unable to find transcript, {refseq_c_ac}, " f"position change"
385
+ "Unable to find transcript, %s, position change", refseq_c_ac
384
386
  )
385
387
  return None
386
- else:
387
- result = result[-1]
388
- tx_g_pos = result[5], result[6] # alt_start_i, alt_end_i
389
- tx_pos_range = result[2], result[3] # tx_start_i, tx_end_i
388
+ result = result[-1]
389
+ tx_g_pos = result[5], result[6] # alt_start_i, alt_end_i
390
+ tx_pos_range = result[2], result[3] # tx_start_i, tx_end_i
390
391
 
391
392
  cds_start_end = await self.uta_db.get_cds_start_end(refseq_c_ac)
392
393
  if not cds_start_end:
@@ -438,14 +439,17 @@ class ManeTranscript:
438
439
 
439
440
  if og_rf != new_rf:
440
441
  logger.warning(
441
- f"{ac} original reading frame ({og_rf}) does not match new "
442
- f"{transcript_data.ensembl}, {transcript_data.refseq} reading "
443
- f"frame ({new_rf})"
442
+ "%s original reading frame (%s) does not match new %s, %s reading frame (%s)",
443
+ ac,
444
+ og_rf,
445
+ transcript_data.ensembl,
446
+ transcript_data.refseq,
447
+ new_rf,
444
448
  )
445
449
  return False
446
450
  else:
447
451
  if pos_index == 0:
448
- logger.warning(f"{ac} must having start position")
452
+ logger.warning("%s must having start position", ac)
449
453
  return False
450
454
  return True
451
455
 
@@ -503,13 +507,15 @@ class ManeTranscript:
503
507
 
504
508
  if expected_ref != mane_ref:
505
509
  logger.info(
506
- f"Expected ref, {expected_ref}, but got {mane_ref}"
507
- f" on MANE accession, {mane_transcript.refseq}"
510
+ "Expected ref, %s, but got %s on MANE accession, %s",
511
+ expected_ref,
512
+ mane_ref,
513
+ mane_transcript.refseq,
508
514
  )
509
515
 
510
516
  if expected_ref != ref:
511
517
  logger.warning(
512
- f"Expected ref, {expected_ref}, but got {ref} on accession, {ac}"
518
+ "Expected ref, %s, but got %s on accession, %s", expected_ref, ref, ac
513
519
  )
514
520
  return False
515
521
 
@@ -531,8 +537,7 @@ class ManeTranscript:
531
537
  ac, start=start_pos, end=end_pos, residue_mode=ResidueMode.INTER_RESIDUE
532
538
  )[0]:
533
539
  return True
534
- else:
535
- return False
540
+ return False
536
541
 
537
542
  def _get_prioritized_transcripts_from_gene(self, df: pl.DataFrame) -> List:
538
543
  """Sort and filter transcripts from gene to get priority list
@@ -687,7 +692,7 @@ class ManeTranscript:
687
692
  )
688
693
 
689
694
  if df.is_empty():
690
- logger.warning(f"Unable to get transcripts from gene {gene}")
695
+ logger.warning("Unable to get transcripts from gene %s", gene)
691
696
  return lcr_result
692
697
 
693
698
  prioritized_tx_acs = self._get_prioritized_transcripts_from_gene(df)
@@ -810,38 +815,42 @@ class ManeTranscript:
810
815
 
811
816
  if not self._validate_index(ac, pos, coding_start_site):
812
817
  logger.warning(
813
- f"{pos} are not valid positions on {ac} with coding start site "
814
- f"{coding_start_site}"
818
+ "%s are not valid positions on %s with coding start site %s",
819
+ pos,
820
+ ac,
821
+ coding_start_site,
815
822
  )
816
823
  continue
817
824
  return lcr_result
818
- else:
819
- lcr_result = ProteinAndCdnaRepresentation(
820
- protein=_get_protein_rep(
821
- gene,
822
- row["pro_ac"],
823
- lcr_c_data.pos,
824
- g["strand"],
825
- lcr_c_data.status,
826
- ),
827
- cdna=lcr_c_data,
828
- )
829
- lcr_result_dict = lcr_result.model_dump()
830
-
831
- valid = True
832
- for k in lcr_result_dict.keys():
833
- cds = lcr_result_dict[k].get("coding_start_site", 0)
834
- ac = lcr_result_dict[k]["refseq"] or lcr_result_dict[k]["ensembl"]
835
- pos = lcr_result_dict[k]["pos"]
836
- if not self._validate_index(ac, pos, cds):
837
- valid = False
838
- logger.warning(
839
- f"{pos} are not valid positions on {ac} with coding start site {cds}"
840
- )
841
- break
842
-
843
- if valid:
844
- return lcr_result
825
+ lcr_result = ProteinAndCdnaRepresentation(
826
+ protein=_get_protein_rep(
827
+ gene,
828
+ row["pro_ac"],
829
+ lcr_c_data.pos,
830
+ g["strand"],
831
+ lcr_c_data.status,
832
+ ),
833
+ cdna=lcr_c_data,
834
+ )
835
+ lcr_result_dict = lcr_result.model_dump()
836
+
837
+ valid = True
838
+ for k in lcr_result_dict:
839
+ cds = lcr_result_dict[k].get("coding_start_site", 0)
840
+ ac = lcr_result_dict[k]["refseq"] or lcr_result_dict[k]["ensembl"]
841
+ pos = lcr_result_dict[k]["pos"]
842
+ if not self._validate_index(ac, pos, cds):
843
+ valid = False
844
+ logger.warning(
845
+ "%s are not valid positions on %s with coding start site %s",
846
+ pos,
847
+ ac,
848
+ cds,
849
+ )
850
+ break
851
+
852
+ if valid:
853
+ return lcr_result
845
854
  return lcr_result
846
855
 
847
856
  async def get_mane_transcript(
@@ -917,9 +926,10 @@ class ManeTranscript:
917
926
  # those transcripts meeting criterion
918
927
  mane_transcripts = set()
919
928
  for current_mane_data in mane_data:
920
- mane_transcripts |= set(
921
- (current_mane_data["RefSeq_nuc"], current_mane_data["Ensembl_nuc"])
922
- )
929
+ mane_transcripts |= {
930
+ current_mane_data["RefSeq_nuc"],
931
+ current_mane_data["Ensembl_nuc"],
932
+ }
923
933
  mane: Optional[CdnaRepresentation] = await self._g_to_c(
924
934
  g=g,
925
935
  refseq_c_ac=current_mane_data["RefSeq_nuc"],
@@ -974,24 +984,22 @@ class ManeTranscript:
974
984
  residue_mode=residue_mode,
975
985
  mane_transcripts=mane_transcripts,
976
986
  )
977
- else:
978
- return await self.get_longest_compatible_transcript(
979
- c_pos[0],
980
- c_pos[1],
981
- AnnotationLayer.CDNA,
982
- ref=ref,
983
- gene=g["gene"],
984
- residue_mode=residue_mode,
985
- mane_transcripts=mane_transcripts,
986
- )
987
- else:
988
- return None
989
- elif start_annotation_layer == AnnotationLayer.GENOMIC:
987
+ return await self.get_longest_compatible_transcript(
988
+ c_pos[0],
989
+ c_pos[1],
990
+ AnnotationLayer.CDNA,
991
+ ref=ref,
992
+ gene=g["gene"],
993
+ residue_mode=residue_mode,
994
+ mane_transcripts=mane_transcripts,
995
+ )
996
+ return None
997
+ if start_annotation_layer == AnnotationLayer.GENOMIC:
990
998
  return await self.g_to_mane_c(
991
999
  ac, start_pos, end_pos, gene=gene, residue_mode=residue_mode
992
1000
  )
993
- else:
994
- logger.warning(f"Annotation layer not supported: {start_annotation_layer}")
1001
+ logger.warning("Annotation layer not supported: %s", start_annotation_layer)
1002
+ return None
995
1003
 
996
1004
  async def g_to_grch38(
997
1005
  self, ac: str, start_pos: int, end_pos: int
@@ -1011,9 +1019,8 @@ class ManeTranscript:
1011
1019
  if not descr:
1012
1020
  # Already GRCh38 assembly
1013
1021
  if self._validate_index(ac, (start_pos, end_pos), 0):
1014
- return dict(ac=ac, pos=(start_pos, end_pos))
1015
- else:
1016
- return None
1022
+ return {"ac": ac, "pos": (start_pos, end_pos)}
1023
+ return None
1017
1024
  chromosome, assembly = descr
1018
1025
  is_same_pos = start_pos == end_pos
1019
1026
 
@@ -1027,8 +1034,7 @@ class ManeTranscript:
1027
1034
  )
1028
1035
  if liftover_start_i is None:
1029
1036
  return None
1030
- else:
1031
- start_pos = liftover_start_i[1]
1037
+ start_pos = liftover_start_i[1]
1032
1038
 
1033
1039
  if not is_same_pos:
1034
1040
  liftover_end_i = self.uta_db.get_liftover(
@@ -1036,8 +1042,7 @@ class ManeTranscript:
1036
1042
  )
1037
1043
  if liftover_end_i is None:
1038
1044
  return None
1039
- else:
1040
- end_pos = liftover_end_i[1]
1045
+ end_pos = liftover_end_i[1]
1041
1046
  else:
1042
1047
  end_pos = start_pos
1043
1048
 
@@ -1045,8 +1050,7 @@ class ManeTranscript:
1045
1050
  if newest_ac:
1046
1051
  ac = newest_ac[0]
1047
1052
  if self._validate_index(ac, (start_pos, end_pos), 0):
1048
- return dict(ac=ac, pos=(start_pos, end_pos))
1049
-
1053
+ return {"ac": ac, "pos": (start_pos, end_pos)}
1050
1054
  return None
1051
1055
 
1052
1056
  @staticmethod
@@ -1128,7 +1132,7 @@ class ManeTranscript:
1128
1132
  )
1129
1133
 
1130
1134
  if not await self.uta_db.validate_genomic_ac(ac):
1131
- logger.warning(f"Genomic accession does not exist: {ac}")
1135
+ logger.warning("Genomic accession does not exist: %s", ac)
1132
1136
  return None
1133
1137
 
1134
1138
  mane_data = self.mane_transcript_mappings.get_gene_mane_data(gene)
@@ -1154,8 +1158,7 @@ class ManeTranscript:
1154
1158
  )
1155
1159
  if not mane_tx_genomic_data:
1156
1160
  continue
1157
- else:
1158
- logger.info("Not using most recent assembly")
1161
+ logger.info("Not using most recent assembly")
1159
1162
 
1160
1163
  coding_start_site = mane_tx_genomic_data["coding_start_site"]
1161
1164
  coding_end_site = mane_tx_genomic_data["coding_end_site"]
@@ -1167,9 +1170,10 @@ class ManeTranscript:
1167
1170
  mane_c_ac, mane_c_pos_change, coding_start_site
1168
1171
  ):
1169
1172
  logger.warning(
1170
- f"{mane_c_pos_change} are not valid positions"
1171
- f" on {mane_c_ac}with coding start site "
1172
- f"{coding_start_site}"
1173
+ "%s are not valid positions on %s with coding start site %s",
1174
+ mane_c_pos_change,
1175
+ mane_c_ac,
1176
+ coding_start_site,
1173
1177
  )
1174
1178
  continue
1175
1179
 
@@ -1187,6 +1191,7 @@ class ManeTranscript:
1187
1191
  ensembl_c_ac=current_mane_data["Ensembl_nuc"],
1188
1192
  alt_ac=grch38["ac"] if grch38 else None,
1189
1193
  )
1194
+ return None
1190
1195
 
1191
1196
  async def grch38_to_mane_c_p(
1192
1197
  self,
@@ -1234,7 +1239,7 @@ class ManeTranscript:
1234
1239
  mane_transcripts = set() # Used if getting longest compatible remaining
1235
1240
  for current_mane_data in mane_data:
1236
1241
  mane_c_ac = current_mane_data["RefSeq_nuc"]
1237
- mane_transcripts |= set((mane_c_ac, current_mane_data["Ensembl_nuc"]))
1242
+ mane_transcripts |= {mane_c_ac, current_mane_data["Ensembl_nuc"]}
1238
1243
 
1239
1244
  # GRCh38 -> MANE C
1240
1245
  mane_tx_genomic_data = await self.uta_db.get_mane_c_genomic_data(
@@ -1255,8 +1260,10 @@ class ManeTranscript:
1255
1260
  mane_c_ac, mane_c_pos_change, coding_start_site
1256
1261
  ):
1257
1262
  logger.warning(
1258
- f"{mane_c_pos_change} are not valid positions on {mane_c_ac} with "
1259
- f"coding start site {coding_start_site}"
1263
+ "%s are not valid positions on %s with coding start site %s",
1264
+ mane_c_pos_change,
1265
+ mane_c_ac,
1266
+ coding_start_site,
1260
1267
  )
1261
1268
  continue
1262
1269
 
@@ -1286,5 +1293,4 @@ class ManeTranscript:
1286
1293
  end_annotation_layer=EndAnnotationLayer.PROTEIN_AND_CDNA,
1287
1294
  mane_transcripts=mane_transcripts,
1288
1295
  )
1289
- else:
1290
- return None
1296
+ return None