cool-seq-tool 0.4.0.dev1__py3-none-any.whl → 0.4.0.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/data/__init__.py +1 -1
- cool_seq_tool/data/data_downloads.py +19 -13
- cool_seq_tool/handlers/seqrepo_access.py +2 -4
- cool_seq_tool/mappers/exon_genomic_coords.py +25 -26
- cool_seq_tool/mappers/mane_transcript.py +112 -106
- cool_seq_tool/routers/default.py +7 -9
- cool_seq_tool/routers/mane.py +2 -2
- cool_seq_tool/schemas.py +30 -21
- cool_seq_tool/sources/mane_transcript_mappings.py +1 -1
- cool_seq_tool/sources/transcript_mappings.py +13 -16
- cool_seq_tool/sources/uta_database.py +134 -153
- cool_seq_tool/utils.py +5 -2
- cool_seq_tool/version.py +1 -1
- {cool_seq_tool-0.4.0.dev1.dist-info → cool_seq_tool-0.4.0.dev2.dist-info}/METADATA +7 -6
- cool_seq_tool-0.4.0.dev2.dist-info/RECORD +29 -0
- cool_seq_tool-0.4.0.dev1.dist-info/RECORD +0 -29
- {cool_seq_tool-0.4.0.dev1.dist-info → cool_seq_tool-0.4.0.dev2.dist-info}/LICENSE +0 -0
- {cool_seq_tool-0.4.0.dev1.dist-info → cool_seq_tool-0.4.0.dev2.dist-info}/WHEEL +0 -0
- {cool_seq_tool-0.4.0.dev1.dist-info → cool_seq_tool-0.4.0.dev2.dist-info}/top_level.txt +0 -0
@@ -10,16 +10,16 @@ from urllib.parse import quote, unquote, urlparse
|
|
10
10
|
import asyncpg
|
11
11
|
import boto3
|
12
12
|
import polars as pl
|
13
|
+
from agct import Converter, Genome
|
13
14
|
from asyncpg.exceptions import InterfaceError, InvalidAuthorizationSpecificationError
|
14
15
|
from botocore.exceptions import ClientError
|
15
|
-
from pyliftover import LiftOver
|
16
16
|
|
17
17
|
from cool_seq_tool.schemas import AnnotationLayer, Assembly, Strand
|
18
18
|
|
19
19
|
# use `bound` to upper-bound UtaDatabase or child classes
|
20
20
|
UTADatabaseType = TypeVar("UTADatabaseType", bound="UtaDatabase")
|
21
21
|
|
22
|
-
# Environment variables for paths to chain files for
|
22
|
+
# Environment variables for paths to chain files for agct
|
23
23
|
LIFTOVER_CHAIN_37_TO_38 = environ.get("LIFTOVER_CHAIN_37_TO_38")
|
24
24
|
LIFTOVER_CHAIN_38_TO_37 = environ.get("LIFTOVER_CHAIN_38_TO_37")
|
25
25
|
|
@@ -55,13 +55,13 @@ class UtaDatabase:
|
|
55
55
|
:param db_url: PostgreSQL connection URL
|
56
56
|
Format: ``driver://user:password@host/database/schema``
|
57
57
|
:param chain_file_37_to_38: Optional path to chain file for 37 to 38 assembly.
|
58
|
-
This is used for ``
|
58
|
+
This is used for ``agct``. If this is not provided, will check to see
|
59
59
|
if ``LIFTOVER_CHAIN_37_TO_38`` env var is set. If neither is provided, will
|
60
|
-
allow ``
|
60
|
+
allow ``agct`` to download a chain file from UCSC
|
61
61
|
:param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly.
|
62
|
-
This is used for ``
|
62
|
+
This is used for ``agct``. If this is not provided, will check to see
|
63
63
|
if ``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will
|
64
|
-
allow ``
|
64
|
+
allow ``agct`` to download a chain file from UCSC
|
65
65
|
"""
|
66
66
|
self.schema = None
|
67
67
|
self._connection_pool = None
|
@@ -71,15 +71,15 @@ class UtaDatabase:
|
|
71
71
|
|
72
72
|
chain_file_37_to_38 = chain_file_37_to_38 or LIFTOVER_CHAIN_37_TO_38
|
73
73
|
if chain_file_37_to_38:
|
74
|
-
self.liftover_37_to_38 =
|
74
|
+
self.liftover_37_to_38 = Converter(chainfile=chain_file_37_to_38)
|
75
75
|
else:
|
76
|
-
self.liftover_37_to_38 =
|
76
|
+
self.liftover_37_to_38 = Converter(from_db=Genome.HG19, to_db=Genome.HG38)
|
77
77
|
|
78
78
|
chain_file_38_to_37 = chain_file_38_to_37 or LIFTOVER_CHAIN_38_TO_37
|
79
79
|
if chain_file_38_to_37:
|
80
|
-
self.liftover_38_to_37 =
|
80
|
+
self.liftover_38_to_37 = Converter(chainfile=chain_file_38_to_37)
|
81
81
|
else:
|
82
|
-
self.liftover_38_to_37 =
|
82
|
+
self.liftover_38_to_37 = Converter(from_db=Genome.HG38, to_db=Genome.HG19)
|
83
83
|
|
84
84
|
def _get_conn_args(self) -> Dict:
|
85
85
|
"""Return connection arguments.
|
@@ -102,24 +102,23 @@ class UtaDatabase:
|
|
102
102
|
environ[
|
103
103
|
"UTA_DB_URL"
|
104
104
|
] = f"postgresql://{username}@{host}:{port}/{database}/{schema}"
|
105
|
-
return
|
106
|
-
host
|
107
|
-
port
|
108
|
-
database
|
109
|
-
user
|
110
|
-
password
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
)
|
105
|
+
return {
|
106
|
+
"host": host,
|
107
|
+
"port": int(port),
|
108
|
+
"database": database,
|
109
|
+
"user": username,
|
110
|
+
"password": password,
|
111
|
+
}
|
112
|
+
url = ParseResult(urlparse(self.db_url))
|
113
|
+
self.schema = url.schema
|
114
|
+
password = unquote(url.password) if url.password else ""
|
115
|
+
return {
|
116
|
+
"host": url.hostname,
|
117
|
+
"port": url.port,
|
118
|
+
"database": url.database,
|
119
|
+
"user": url.username,
|
120
|
+
"password": password,
|
121
|
+
}
|
123
122
|
|
124
123
|
async def create_pool(self) -> None:
|
125
124
|
"""Create connection pool if not already created."""
|
@@ -139,9 +138,10 @@ class UtaDatabase:
|
|
139
138
|
)
|
140
139
|
except InterfaceError as e:
|
141
140
|
logger.error(
|
142
|
-
|
141
|
+
"While creating connection pool, encountered exception %s", e
|
143
142
|
)
|
144
|
-
|
143
|
+
msg = "Could not create connection pool"
|
144
|
+
raise Exception(msg) from e
|
145
145
|
|
146
146
|
@classmethod
|
147
147
|
async def create(
|
@@ -173,21 +173,17 @@ class UtaDatabase:
|
|
173
173
|
"""
|
174
174
|
|
175
175
|
async def _execute_query(q: str) -> Any: # noqa: ANN401
|
176
|
-
async with self._connection_pool.acquire() as connection:
|
177
|
-
|
178
|
-
r = await connection.fetch(q)
|
179
|
-
return r
|
176
|
+
async with self._connection_pool.acquire() as connection, connection.transaction():
|
177
|
+
return await connection.fetch(q)
|
180
178
|
|
181
179
|
if not self._connection_pool:
|
182
180
|
await self.create_pool()
|
183
181
|
try:
|
184
|
-
|
185
|
-
return result
|
182
|
+
return await _execute_query(query)
|
186
183
|
except InvalidAuthorizationSpecificationError:
|
187
184
|
self._connection_pool = None
|
188
185
|
await self.create_pool()
|
189
|
-
|
190
|
-
return result
|
186
|
+
return await _execute_query(query)
|
191
187
|
|
192
188
|
async def _create_genomic_table(self) -> None:
|
193
189
|
"""Create table containing genomic accession information."""
|
@@ -197,7 +193,7 @@ class UtaDatabase:
|
|
197
193
|
WHERE table_schema = '{self.schema}'
|
198
194
|
AND table_name = 'genomic'
|
199
195
|
);
|
200
|
-
"""
|
196
|
+
""" # noqa: S608
|
201
197
|
genomic_table_exists = await self.execute_query(check_table_exists)
|
202
198
|
genomic_table_exists = genomic_table_exists[0].get("exists")
|
203
199
|
if genomic_table_exists is None:
|
@@ -205,7 +201,8 @@ class UtaDatabase:
|
|
205
201
|
"SELECT EXISTS query in UtaDatabase._create_genomic_table "
|
206
202
|
"returned invalid response"
|
207
203
|
)
|
208
|
-
|
204
|
+
msg = "SELECT EXISTS query returned invalid response"
|
205
|
+
raise ValueError(msg)
|
209
206
|
if not genomic_table_exists:
|
210
207
|
create_genomic_table = f"""
|
211
208
|
CREATE TABLE {self.schema}.genomic AS
|
@@ -225,7 +222,7 @@ class UtaDatabase:
|
|
225
222
|
LEFT JOIN {self.schema}.exon_aln ea ON
|
226
223
|
(((te.exon_id = ea.tx_exon_id) AND
|
227
224
|
(ae.exon_id = ea.alt_exon_id))));
|
228
|
-
"""
|
225
|
+
""" # noqa: S608
|
229
226
|
await self.execute_query(create_genomic_table)
|
230
227
|
|
231
228
|
indexes = [
|
@@ -243,9 +240,9 @@ class UtaDatabase:
|
|
243
240
|
:param li: List of asyncpg.Record objects
|
244
241
|
:return: List of list of objects
|
245
242
|
"""
|
246
|
-
results =
|
243
|
+
results = []
|
247
244
|
for item in li:
|
248
|
-
results.append(
|
245
|
+
results.append(list(item))
|
249
246
|
return results
|
250
247
|
|
251
248
|
async def get_genes_and_alt_acs(
|
@@ -285,7 +282,7 @@ class UtaDatabase:
|
|
285
282
|
AND {pos} BETWEEN alt_start_i AND alt_end_i
|
286
283
|
{strand_cond}
|
287
284
|
{gene_cond};
|
288
|
-
"""
|
285
|
+
""" # noqa: S608
|
289
286
|
|
290
287
|
results = await self.execute_query(query)
|
291
288
|
if not results:
|
@@ -309,7 +306,7 @@ class UtaDatabase:
|
|
309
306
|
for r in results:
|
310
307
|
genes.add(r[0])
|
311
308
|
alt_acs.add(r[1])
|
312
|
-
return
|
309
|
+
return {"genes": genes, "alt_acs": alt_acs}, None
|
313
310
|
|
314
311
|
async def get_tx_exons(
|
315
312
|
self, tx_ac: str, alt_ac: Optional[str] = None
|
@@ -329,7 +326,7 @@ class UtaDatabase:
|
|
329
326
|
WHERE tx_ac = '{tx_ac}'
|
330
327
|
AND alt_aln_method = 'splign'
|
331
328
|
AND alt_ac = '{alt_ac}'
|
332
|
-
"""
|
329
|
+
""" # noqa: S608
|
333
330
|
else:
|
334
331
|
# Use GRCh38 by default if no genomic accession is provided
|
335
332
|
query = f"""
|
@@ -341,16 +338,15 @@ class UtaDatabase:
|
|
341
338
|
AND t.tx_ac = '{tx_ac}'
|
342
339
|
AND t.alt_aln_method = 'splign'
|
343
340
|
AND t.alt_ac like 'NC_000%'
|
344
|
-
"""
|
341
|
+
""" # noqa: S608
|
345
342
|
result = await self.execute_query(query)
|
346
343
|
|
347
344
|
if not result:
|
348
345
|
msg = f"Unable to get exons for {tx_ac}"
|
349
346
|
logger.warning(msg)
|
350
347
|
return None, msg
|
351
|
-
|
352
|
-
|
353
|
-
return tx_exons, None
|
348
|
+
tx_exons = [(r["tx_start_i"], r["tx_end_i"]) for r in result]
|
349
|
+
return tx_exons, None
|
354
350
|
|
355
351
|
async def get_alt_ac_start_or_end(
|
356
352
|
self, tx_ac: str, tx_exon_start: int, tx_exon_end: int, gene: Optional[str]
|
@@ -365,10 +361,7 @@ class UtaDatabase:
|
|
365
361
|
aligned genomic start coordinate, aligned genomic end coordinate, strand],
|
366
362
|
and warnings if found
|
367
363
|
"""
|
368
|
-
if gene
|
369
|
-
gene_query = f"AND T.hgnc = '{gene}'"
|
370
|
-
else:
|
371
|
-
gene_query = ""
|
364
|
+
gene_query = f"AND T.hgnc = '{gene}'" if gene else ""
|
372
365
|
|
373
366
|
query = f"""
|
374
367
|
SELECT T.hgnc, T.alt_ac, T.alt_start_i, T.alt_end_i, T.alt_strand
|
@@ -382,7 +375,7 @@ class UtaDatabase:
|
|
382
375
|
AND T.alt_ac LIKE 'NC_00%'
|
383
376
|
ORDER BY CAST(SUBSTR(T.alt_ac, position('.' in T.alt_ac) + 1,
|
384
377
|
LENGTH(T.alt_ac)) AS INT) DESC;
|
385
|
-
"""
|
378
|
+
""" # noqa: S608
|
386
379
|
result = await self.execute_query(query)
|
387
380
|
if not result:
|
388
381
|
msg = (
|
@@ -394,8 +387,7 @@ class UtaDatabase:
|
|
394
387
|
msg += f" on gene {gene}"
|
395
388
|
logger.warning(msg)
|
396
389
|
return None, msg
|
397
|
-
|
398
|
-
result = result[0]
|
390
|
+
result = result[0]
|
399
391
|
return (result[0], result[1], result[2], result[3], result[4]), None
|
400
392
|
|
401
393
|
async def get_cds_start_end(self, tx_ac: str) -> Optional[Tuple[int, int]]:
|
@@ -410,15 +402,15 @@ class UtaDatabase:
|
|
410
402
|
SELECT cds_start_i, cds_end_i
|
411
403
|
FROM {self.schema}.transcript
|
412
404
|
WHERE ac='{tx_ac}';
|
413
|
-
"""
|
405
|
+
""" # noqa: S608
|
414
406
|
cds_start_end = await self.execute_query(query)
|
415
407
|
if cds_start_end:
|
416
408
|
cds_start_end = cds_start_end[0]
|
417
|
-
if cds_start_end[0] is not None and cds_start_end[1] is not None:
|
409
|
+
if cds_start_end[0] is not None and cds_start_end[1] is not None: # noqa: RET503
|
418
410
|
return cds_start_end[0], cds_start_end[1]
|
419
411
|
else:
|
420
412
|
logger.warning(
|
421
|
-
|
413
|
+
"Unable to get coding start/end site for accession: %s", tx_ac
|
422
414
|
)
|
423
415
|
return None
|
424
416
|
|
@@ -444,7 +436,7 @@ class UtaDatabase:
|
|
444
436
|
WHERE ac LIKE '{ac.split('.')[0]}%'
|
445
437
|
AND ((descr IS NULL) OR (descr = ''))
|
446
438
|
{order_by_cond}
|
447
|
-
"""
|
439
|
+
""" # noqa: S608
|
448
440
|
results = await self.execute_query(query)
|
449
441
|
if not results:
|
450
442
|
return []
|
@@ -463,7 +455,7 @@ class UtaDatabase:
|
|
463
455
|
FROM {self.schema}._seq_anno_most_recent
|
464
456
|
WHERE ac = '{ac}'
|
465
457
|
);
|
466
|
-
"""
|
458
|
+
""" # noqa: S608
|
467
459
|
result = await self.execute_query(query)
|
468
460
|
return result[0][0]
|
469
461
|
|
@@ -487,16 +479,15 @@ class UtaDatabase:
|
|
487
479
|
SELECT descr
|
488
480
|
FROM {self.schema}._seq_anno_most_recent
|
489
481
|
WHERE ac = '{ac}';
|
490
|
-
"""
|
482
|
+
""" # noqa: S608
|
491
483
|
result = await self.execute_query(query)
|
492
484
|
if not result:
|
493
|
-
logger.warning(
|
485
|
+
logger.warning("Accession %s does not have a description", ac)
|
494
486
|
return None
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
return result
|
487
|
+
result = result[0][0]
|
488
|
+
if result == "":
|
489
|
+
result = None
|
490
|
+
return result
|
500
491
|
|
501
492
|
async def get_tx_exon_aln_v_data(
|
502
493
|
self,
|
@@ -533,11 +524,11 @@ class UtaDatabase:
|
|
533
524
|
aln_method = f"AND alt_aln_method='splign'" # noqa: F541
|
534
525
|
|
535
526
|
if like_tx_ac:
|
536
|
-
tx_q = f"WHERE tx_ac LIKE '{temp_ac}%'"
|
527
|
+
tx_q = f"WHERE tx_ac LIKE '{temp_ac}%'"
|
537
528
|
else:
|
538
|
-
tx_q = f"WHERE tx_ac='{temp_ac}'"
|
529
|
+
tx_q = f"WHERE tx_ac='{temp_ac}'"
|
539
530
|
|
540
|
-
order_by_cond = "ORDER BY CAST(SUBSTR(alt_ac, position('.' in alt_ac) + 1, LENGTH(alt_ac)) AS INT)"
|
531
|
+
order_by_cond = "ORDER BY CAST(SUBSTR(alt_ac, position('.' in alt_ac) + 1, LENGTH(alt_ac)) AS INT)"
|
541
532
|
if alt_ac:
|
542
533
|
alt_ac_q = f"AND alt_ac = '{alt_ac}'"
|
543
534
|
if alt_ac.startswith("EN"):
|
@@ -560,22 +551,20 @@ class UtaDatabase:
|
|
560
551
|
AND {start_pos} BETWEEN {pos_q}
|
561
552
|
AND {end_pos} BETWEEN {pos_q}
|
562
553
|
{order_by_cond}
|
563
|
-
"""
|
554
|
+
""" # noqa: S608
|
564
555
|
result = await self.execute_query(query)
|
565
556
|
if not result:
|
566
|
-
logger.warning(
|
567
|
-
f"Unable to find transcript alignment for query: " f"{query}"
|
568
|
-
)
|
557
|
+
logger.warning("Unable to find transcript alignment for query: %s", query)
|
569
558
|
return []
|
570
|
-
if alt_ac and not use_tx_pos:
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
results =
|
559
|
+
if alt_ac and not use_tx_pos and len(result) > 1:
|
560
|
+
logger.debug(
|
561
|
+
"Found more than one match for tx_ac %s and alt_ac = %s",
|
562
|
+
temp_ac,
|
563
|
+
alt_ac,
|
564
|
+
)
|
565
|
+
results = []
|
577
566
|
for r in result:
|
578
|
-
results.append(
|
567
|
+
results.append(list(r))
|
579
568
|
return results
|
580
569
|
|
581
570
|
@staticmethod
|
@@ -595,21 +584,21 @@ class UtaDatabase:
|
|
595
584
|
|
596
585
|
if (tx_pos_range[1] - tx_pos_range[0]) != (alt_pos_range[1] - alt_pos_range[0]):
|
597
586
|
logger.warning(
|
598
|
-
|
599
|
-
|
600
|
-
|
587
|
+
"tx_pos_range %s is not the same length as alt_pos_range %s.",
|
588
|
+
tx_pos_range,
|
589
|
+
alt_pos_range,
|
601
590
|
)
|
602
591
|
return None
|
603
592
|
|
604
|
-
return
|
605
|
-
gene
|
606
|
-
strand
|
607
|
-
tx_pos_range
|
608
|
-
alt_pos_range
|
609
|
-
alt_aln_method
|
610
|
-
tx_exon_id
|
611
|
-
alt_exon_id
|
612
|
-
|
593
|
+
return {
|
594
|
+
"gene": gene,
|
595
|
+
"strand": strand,
|
596
|
+
"tx_pos_range": tx_pos_range,
|
597
|
+
"alt_pos_range": alt_pos_range,
|
598
|
+
"alt_aln_method": alt_aln_method,
|
599
|
+
"tx_exon_id": tx_exon_id,
|
600
|
+
"alt_exon_id": alt_exon_id,
|
601
|
+
}
|
613
602
|
|
614
603
|
async def get_mane_c_genomic_data(
|
615
604
|
self, ac: str, alt_ac: Optional[str], start_pos: int, end_pos: int
|
@@ -652,7 +641,7 @@ class UtaDatabase:
|
|
652
641
|
|
653
642
|
coding_start_site = await self.get_cds_start_end(ac)
|
654
643
|
if coding_start_site is None:
|
655
|
-
logger.warning(
|
644
|
+
logger.warning("Accession %s not found in UTA", ac)
|
656
645
|
return None
|
657
646
|
|
658
647
|
data["tx_ac"] = result[1]
|
@@ -753,7 +742,7 @@ class UtaDatabase:
|
|
753
742
|
WHERE hgnc = '{gene}'
|
754
743
|
AND alt_ac LIKE 'NC_00%'
|
755
744
|
ORDER BY alt_ac;
|
756
|
-
"""
|
745
|
+
""" # noqa: S608
|
757
746
|
|
758
747
|
records = await self.execute_query(query)
|
759
748
|
if not records:
|
@@ -790,19 +779,20 @@ class UtaDatabase:
|
|
790
779
|
WHERE alt_ac = '{ac}'
|
791
780
|
AND {start_pos} BETWEEN alt_start_i AND alt_end_i
|
792
781
|
AND {end_pos} BETWEEN alt_start_i AND alt_end_i;
|
793
|
-
"""
|
782
|
+
""" # noqa: S608
|
794
783
|
results = await self.execute_query(query)
|
795
784
|
if not results:
|
796
785
|
logger.warning(
|
797
|
-
|
786
|
+
"Unable to find gene between %s and %s on %s", start_pos, end_pos, ac
|
798
787
|
)
|
799
788
|
return None
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
789
|
+
if len(results) > 1:
|
790
|
+
logger.info(
|
791
|
+
"Found more than one gene between %s and %s on %s",
|
792
|
+
start_pos,
|
793
|
+
end_pos,
|
794
|
+
ac,
|
795
|
+
)
|
806
796
|
|
807
797
|
return [r[0] for r in results]
|
808
798
|
|
@@ -876,7 +866,7 @@ class UtaDatabase:
|
|
876
866
|
{alt_ac_cond}
|
877
867
|
{pos_cond}
|
878
868
|
{order_by_cond}
|
879
|
-
"""
|
869
|
+
""" # noqa: S608
|
880
870
|
results = await self.execute_query(query)
|
881
871
|
results = [
|
882
872
|
(r["pro_ac"], r["tx_ac"], r["alt_ac"], r["cds_start_i"]) for r in results
|
@@ -902,8 +892,8 @@ class UtaDatabase:
|
|
902
892
|
|
903
893
|
if assembly not in ["GRCh37", "GRCh38"]:
|
904
894
|
logger.warning(
|
905
|
-
|
906
|
-
|
895
|
+
"Assembly not supported: %s. Only GRCh37 and GRCh38 are supported.",
|
896
|
+
assembly,
|
907
897
|
)
|
908
898
|
return None
|
909
899
|
|
@@ -918,22 +908,22 @@ class UtaDatabase:
|
|
918
908
|
descr = await self.get_chr_assembly(genomic_tx_data["alt_ac"])
|
919
909
|
if descr is None:
|
920
910
|
# already grch38
|
921
|
-
return
|
911
|
+
return
|
922
912
|
chromosome, _ = descr
|
923
913
|
|
924
914
|
query = f"""
|
925
915
|
SELECT DISTINCT alt_ac
|
926
916
|
FROM {self.schema}.tx_exon_aln_v
|
927
917
|
WHERE tx_ac = '{genomic_tx_data['tx_ac']}';
|
928
|
-
"""
|
918
|
+
""" # noqa: S608
|
929
919
|
nc_acs = await self.execute_query(query)
|
930
920
|
nc_acs = [nc_ac[0] for nc_ac in nc_acs]
|
931
921
|
if nc_acs == [genomic_tx_data["alt_ac"]]:
|
932
922
|
logger.warning(
|
933
|
-
|
934
|
-
|
923
|
+
"UTA does not have GRCh38 assembly for %s",
|
924
|
+
genomic_tx_data["alt_ac"].split(".")[0],
|
935
925
|
)
|
936
|
-
return
|
926
|
+
return
|
937
927
|
|
938
928
|
# Get most recent assembly version position
|
939
929
|
# Liftover range
|
@@ -959,20 +949,19 @@ class UtaDatabase:
|
|
959
949
|
FROM {self.schema}.genomic
|
960
950
|
WHERE alt_ac LIKE '{genomic_tx_data['alt_ac'].split('.')[0]}%'
|
961
951
|
{order_by_cond}
|
962
|
-
"""
|
952
|
+
""" # noqa: S608
|
963
953
|
nc_acs = await self.execute_query(query)
|
964
954
|
genomic_tx_data["alt_ac"] = nc_acs[0][0]
|
965
955
|
|
966
956
|
def get_liftover(
|
967
957
|
self, chromosome: str, pos: int, liftover_to_assembly: Assembly
|
968
|
-
) -> Optional[Tuple]:
|
958
|
+
) -> Optional[Tuple[str, int]]:
|
969
959
|
"""Get new genome assembly data for a position on a chromosome.
|
970
960
|
|
971
961
|
:param chromosome: The chromosome number. Must be prefixed with ``chr``
|
972
962
|
:param pos: Position on the chromosome
|
973
963
|
:param liftover_to_assembly: Assembly to liftover to
|
974
|
-
:return:
|
975
|
-
conversion_chain_score] for assembly
|
964
|
+
:return: Target chromosome and target position for assembly
|
976
965
|
"""
|
977
966
|
if not chromosome.startswith("chr"):
|
978
967
|
logger.warning("`chromosome` must be prefixed with chr")
|
@@ -983,14 +972,13 @@ class UtaDatabase:
|
|
983
972
|
elif liftover_to_assembly == Assembly.GRCH37:
|
984
973
|
liftover = self.liftover_38_to_37.convert_coordinate(chromosome, pos)
|
985
974
|
else:
|
986
|
-
logger.warning(
|
975
|
+
logger.warning("%s assembly not supported", liftover_to_assembly)
|
987
976
|
liftover = None
|
988
977
|
|
989
|
-
if
|
990
|
-
logger.warning(
|
978
|
+
if not liftover:
|
979
|
+
logger.warning("%s does not exist on %s", pos, chromosome)
|
991
980
|
return None
|
992
|
-
|
993
|
-
return liftover[0]
|
981
|
+
return liftover[0][:2]
|
994
982
|
|
995
983
|
def _set_liftover(
|
996
984
|
self,
|
@@ -1012,20 +1000,22 @@ class UtaDatabase:
|
|
1012
1000
|
)
|
1013
1001
|
if liftover_start_i is None:
|
1014
1002
|
logger.warning(
|
1015
|
-
|
1016
|
-
|
1003
|
+
"Unable to liftover position %s on %s",
|
1004
|
+
genomic_tx_data[key][0],
|
1005
|
+
chromosome,
|
1017
1006
|
)
|
1018
|
-
return
|
1007
|
+
return
|
1019
1008
|
|
1020
1009
|
liftover_end_i = self.get_liftover(
|
1021
1010
|
chromosome, genomic_tx_data[key][1], liftover_to_assembly
|
1022
1011
|
)
|
1023
1012
|
if liftover_end_i is None:
|
1024
1013
|
logger.warning(
|
1025
|
-
|
1026
|
-
|
1014
|
+
"Unable to liftover position %s on %s",
|
1015
|
+
genomic_tx_data[key][1],
|
1016
|
+
chromosome,
|
1027
1017
|
)
|
1028
|
-
return
|
1018
|
+
return
|
1029
1019
|
|
1030
1020
|
genomic_tx_data[key] = liftover_start_i[1], liftover_end_i[1]
|
1031
1021
|
|
@@ -1051,7 +1041,7 @@ class UtaDatabase:
|
|
1051
1041
|
FROM {self.schema}.associated_accessions
|
1052
1042
|
WHERE pro_ac = '{p_ac}'
|
1053
1043
|
{order_by_cond}
|
1054
|
-
"""
|
1044
|
+
""" # noqa: S608
|
1055
1045
|
result = await self.execute_query(query)
|
1056
1046
|
if result:
|
1057
1047
|
result = [r["tx_ac"] for r in result]
|
@@ -1072,7 +1062,7 @@ class UtaDatabase:
|
|
1072
1062
|
WHERE alt_ac = '{alt_ac}'
|
1073
1063
|
AND {g_pos} BETWEEN alt_start_i AND alt_end_i
|
1074
1064
|
AND tx_ac LIKE 'NM_%';
|
1075
|
-
"""
|
1065
|
+
""" # noqa: S608
|
1076
1066
|
results = await self.execute_query(query)
|
1077
1067
|
if not results:
|
1078
1068
|
return []
|
@@ -1092,35 +1082,26 @@ class UtaDatabase:
|
|
1092
1082
|
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
|
1093
1083
|
except ClientError as e:
|
1094
1084
|
logger.warning(e)
|
1095
|
-
if e.response["Error"]["Code"]
|
1096
|
-
# Secrets Manager can"t decrypt the protected
|
1097
|
-
|
1098
|
-
raise e
|
1099
|
-
elif e.response["Error"]["Code"] == "InternalServiceErrorException":
|
1085
|
+
if e.response["Error"]["Code"] in {
|
1086
|
+
# Secrets Manager can"t decrypt the protected secret text using the provided KMS key.
|
1087
|
+
"DecryptionFailureException",
|
1100
1088
|
# An error occurred on the server side.
|
1101
|
-
|
1102
|
-
elif e.response["Error"]["Code"] == "InvalidParameterException":
|
1089
|
+
"InternalServiceErrorException",
|
1103
1090
|
# You provided an invalid value for a parameter.
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
# the current state of the resource.
|
1108
|
-
raise e
|
1109
|
-
elif e.response["Error"]["Code"] == "ResourceNotFoundException":
|
1091
|
+
"InvalidParameterException",
|
1092
|
+
# You provided a parameter value that is not valid for the current state of the resource.
|
1093
|
+
"InvalidRequestException",
|
1110
1094
|
# We can"t find the resource that you asked for.
|
1095
|
+
"ResourceNotFoundException",
|
1096
|
+
}:
|
1111
1097
|
raise e
|
1112
1098
|
else:
|
1113
1099
|
# Decrypts secret using the associated KMS CMK.
|
1114
1100
|
# Depending on whether the secret is a string or binary,
|
1115
1101
|
# one of these fields will be populated.
|
1116
1102
|
if "SecretString" in get_secret_value_response:
|
1117
|
-
|
1118
|
-
|
1119
|
-
else:
|
1120
|
-
decoded_binary_secret = base64.b64decode(
|
1121
|
-
get_secret_value_response["SecretBinary"]
|
1122
|
-
)
|
1123
|
-
return decoded_binary_secret
|
1103
|
+
return get_secret_value_response["SecretString"]
|
1104
|
+
return base64.b64decode(get_secret_value_response["SecretBinary"])
|
1124
1105
|
|
1125
1106
|
|
1126
1107
|
class ParseResult(UrlLibParseResult):
|
@@ -1129,9 +1110,9 @@ class ParseResult(UrlLibParseResult):
|
|
1129
1110
|
Source: https://github.com/biocommons/hgvs
|
1130
1111
|
"""
|
1131
1112
|
|
1132
|
-
def __new__(cls, pr): # noqa
|
1113
|
+
def __new__(cls, pr): # noqa: ANN001, ANN204
|
1133
1114
|
"""Create new instance."""
|
1134
|
-
return super(ParseResult, cls).__new__(cls, *pr)
|
1115
|
+
return super(ParseResult, cls).__new__(cls, *pr) # noqa: UP008
|
1135
1116
|
|
1136
1117
|
@property
|
1137
1118
|
def database(self) -> Optional[str]:
|
cool_seq_tool/utils.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
"""Provide a small set of general helper functions."""
|
2
|
+
import datetime
|
2
3
|
import logging
|
3
|
-
from datetime import datetime
|
4
4
|
from typing import Tuple
|
5
5
|
|
6
6
|
from cool_seq_tool.schemas import ResidueMode, ServiceMeta
|
@@ -43,4 +43,7 @@ def service_meta() -> ServiceMeta:
|
|
43
43
|
|
44
44
|
:return: ServiceMeta object
|
45
45
|
"""
|
46
|
-
return ServiceMeta(
|
46
|
+
return ServiceMeta(
|
47
|
+
version=__version__,
|
48
|
+
response_datetime=datetime.datetime.now(tz=datetime.timezone.utc),
|
49
|
+
)
|
cool_seq_tool/version.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
"""Define package version."""
|
2
|
-
__version__ = "0.4.0-
|
2
|
+
__version__ = "0.4.0-dev2"
|