cool-seq-tool 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/mappers/alignment.py +1 -1
- cool_seq_tool/mappers/exon_genomic_coords.py +28 -0
- cool_seq_tool/mappers/feature_overlap.py +6 -5
- cool_seq_tool/mappers/mane_transcript.py +17 -10
- cool_seq_tool/resources/status.py +6 -1
- cool_seq_tool/schemas.py +1 -1
- cool_seq_tool/utils.py +1 -1
- {cool_seq_tool-0.14.2.dist-info → cool_seq_tool-0.14.4.dist-info}/METADATA +3 -3
- {cool_seq_tool-0.14.2.dist-info → cool_seq_tool-0.14.4.dist-info}/RECORD +12 -12
- {cool_seq_tool-0.14.2.dist-info → cool_seq_tool-0.14.4.dist-info}/WHEEL +0 -0
- {cool_seq_tool-0.14.2.dist-info → cool_seq_tool-0.14.4.dist-info}/licenses/LICENSE +0 -0
- {cool_seq_tool-0.14.2.dist-info → cool_seq_tool-0.14.4.dist-info}/top_level.txt +0 -0
@@ -106,7 +106,7 @@ class AlignmentMapper:
|
|
106
106
|
c_end_pos: int,
|
107
107
|
cds_start: int | None = None,
|
108
108
|
coordinate_type: CoordinateType = CoordinateType.RESIDUE,
|
109
|
-
target_genome_assembly:
|
109
|
+
target_genome_assembly: Assembly = Assembly.GRCH38,
|
110
110
|
) -> tuple[dict | None, str | None]:
|
111
111
|
"""Translate cDNA representation to genomic representation
|
112
112
|
|
@@ -65,6 +65,27 @@ class TxSegment(BaseModelForbidExtra):
|
|
65
65
|
genomic_location: SequenceLocation = Field(
|
66
66
|
..., description="The genomic position of a transcript segment."
|
67
67
|
)
|
68
|
+
is_exonic: bool = Field(
|
69
|
+
default=True, description="If the position occurs on an exon"
|
70
|
+
)
|
71
|
+
|
72
|
+
@model_validator(mode="before")
|
73
|
+
def check_seg_pos(cls, values: dict) -> dict: # noqa: N805
|
74
|
+
"""Ensure that only one of `start` or `end` is set in the
|
75
|
+
genomic_location field
|
76
|
+
|
77
|
+
:param values: The values in the TxSegment class
|
78
|
+
:raises ValueError: If `start` and `end` are both set in
|
79
|
+
`genomic_location`
|
80
|
+
:return: Values in model
|
81
|
+
"""
|
82
|
+
loc = values.get("genomic_location")
|
83
|
+
start = getattr(loc, "start", None)
|
84
|
+
end = getattr(loc, "end", None)
|
85
|
+
if start and end:
|
86
|
+
err_msg = "Only one of `start` or `end` may be set as this describes the start or end of a transcript segment"
|
87
|
+
raise ValueError(err_msg)
|
88
|
+
return values
|
68
89
|
|
69
90
|
model_config = ConfigDict(
|
70
91
|
json_schema_extra={
|
@@ -79,6 +100,7 @@ class TxSegment(BaseModelForbidExtra):
|
|
79
100
|
},
|
80
101
|
"end": 154192135,
|
81
102
|
},
|
103
|
+
"is_exonic": True,
|
82
104
|
}
|
83
105
|
}
|
84
106
|
)
|
@@ -136,6 +158,7 @@ class GenomicTxSeg(BaseModelForbidExtra):
|
|
136
158
|
},
|
137
159
|
"end": 154192135,
|
138
160
|
},
|
161
|
+
"is_exonic": True,
|
139
162
|
},
|
140
163
|
"errors": [],
|
141
164
|
}
|
@@ -202,6 +225,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
202
225
|
},
|
203
226
|
"end": 154192135,
|
204
227
|
},
|
228
|
+
"is_exonic": True,
|
205
229
|
},
|
206
230
|
"seg_end": {
|
207
231
|
"exon_ord": 7,
|
@@ -214,6 +238,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
214
238
|
},
|
215
239
|
"start": 154170399,
|
216
240
|
},
|
241
|
+
"is_exonic": True,
|
217
242
|
},
|
218
243
|
}
|
219
244
|
}
|
@@ -895,6 +920,7 @@ class ExonGenomicCoordsMapper:
|
|
895
920
|
# Check if breakpoint occurs on an exon.
|
896
921
|
# If not, determine the adjacent exon given the selected transcript
|
897
922
|
if not self._is_exonic_breakpoint(genomic_pos, tx_exons):
|
923
|
+
is_exonic = False
|
898
924
|
exon_num = self._get_adjacent_exon(
|
899
925
|
tx_exons_genomic_coords=tx_exons,
|
900
926
|
strand=strand,
|
@@ -902,6 +928,7 @@ class ExonGenomicCoordsMapper:
|
|
902
928
|
end=genomic_pos if not is_seg_start else None,
|
903
929
|
)
|
904
930
|
else:
|
931
|
+
is_exonic = True
|
905
932
|
exon_data = await self.uta_db.get_tx_exon_aln_v_data(
|
906
933
|
transcript,
|
907
934
|
genomic_pos,
|
@@ -934,6 +961,7 @@ class ExonGenomicCoordsMapper:
|
|
934
961
|
exon_ord=exon_num,
|
935
962
|
offset=offset,
|
936
963
|
genomic_location=genomic_location,
|
964
|
+
is_exonic=is_exonic,
|
937
965
|
),
|
938
966
|
)
|
939
967
|
|
@@ -212,14 +212,15 @@ class FeatureOverlap:
|
|
212
212
|
|
213
213
|
ga4gh_seq_id = ga4gh_aliases[0]
|
214
214
|
|
215
|
-
def _get_seq_loc(
|
216
|
-
|
215
|
+
def _get_seq_loc(
|
216
|
+
start_pos: int, stop_pos: int, refget_ac: str
|
217
|
+
) -> SequenceLocation:
|
218
|
+
"""Get VRS Sequence Location
|
217
219
|
|
218
220
|
:param start_pos: Start position
|
219
221
|
:param stop_pos: Stop position
|
220
222
|
:param refget_ac: Refget Accession (SQ.)
|
221
|
-
:return: VRS Sequence Location
|
222
|
-
included
|
223
|
+
:return: VRS Sequence Location
|
223
224
|
"""
|
224
225
|
_sl = SequenceLocation(
|
225
226
|
sequenceReference=SequenceReference(
|
@@ -229,7 +230,7 @@ class FeatureOverlap:
|
|
229
230
|
end=stop_pos,
|
230
231
|
)
|
231
232
|
ga4gh_identify(_sl)
|
232
|
-
return _sl
|
233
|
+
return _sl
|
233
234
|
|
234
235
|
resp = {}
|
235
236
|
refget_ac = ga4gh_seq_id.split("ga4gh:")[-1]
|
@@ -55,7 +55,7 @@ class DataRepresentation(BaseModel):
|
|
55
55
|
"""Define object model for final output representation"""
|
56
56
|
|
57
57
|
gene: str | None = None
|
58
|
-
refseq: str
|
58
|
+
refseq: str | None = None
|
59
59
|
ensembl: str | None = None
|
60
60
|
pos: tuple[int, int]
|
61
61
|
strand: Strand
|
@@ -447,7 +447,7 @@ class ManeTranscript:
|
|
447
447
|
|
448
448
|
async def _g_to_c(
|
449
449
|
self,
|
450
|
-
g:
|
450
|
+
g: GenomicTxMetadata,
|
451
451
|
refseq_c_ac: str,
|
452
452
|
status: TranscriptPriority,
|
453
453
|
ensembl_c_ac: str | None = None,
|
@@ -590,16 +590,23 @@ class ManeTranscript:
|
|
590
590
|
if mane_transcript:
|
591
591
|
mane_start_pos = mane_transcript.pos[0]
|
592
592
|
mane_end_pos = mane_transcript.pos[1]
|
593
|
-
if anno == AnnotationLayer.CDNA
|
593
|
+
if anno == AnnotationLayer.CDNA and isinstance(
|
594
|
+
mane_transcript, CdnaRepresentation
|
595
|
+
):
|
594
596
|
mane_cds = mane_transcript.coding_start_site
|
595
597
|
mane_start_pos += mane_cds
|
596
598
|
mane_end_pos += mane_cds
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
599
|
+
|
600
|
+
if mane_transcript.refseq:
|
601
|
+
mane_ref, _ = self.seqrepo_access.get_reference_sequence(
|
602
|
+
mane_transcript.refseq,
|
603
|
+
start=mane_start_pos,
|
604
|
+
end=mane_end_pos if mane_start_pos != mane_end_pos else None,
|
605
|
+
coordinate_type=coordinate_type,
|
606
|
+
)
|
607
|
+
else:
|
608
|
+
mane_ref = None
|
609
|
+
|
603
610
|
if not mane_ref:
|
604
611
|
_logger.info("Unable to validate reference for MANE Transcript")
|
605
612
|
|
@@ -1330,7 +1337,7 @@ class ManeTranscript:
|
|
1330
1337
|
gene: str | None = None,
|
1331
1338
|
coordinate_type: CoordinateType = CoordinateType.RESIDUE,
|
1332
1339
|
try_longest_compatible: bool = False,
|
1333
|
-
) ->
|
1340
|
+
) -> ProteinAndCdnaRepresentation | None:
|
1334
1341
|
"""Given GRCh38 genomic representation, return protein representation.
|
1335
1342
|
|
1336
1343
|
Will try MANE Select and then MANE Plus Clinical. If neither is found and
|
@@ -24,6 +24,7 @@ ResourceStatus = namedtuple(
|
|
24
24
|
DataFile.TRANSCRIPT_MAPPINGS.lower(),
|
25
25
|
DataFile.MANE_SUMMARY.lower(),
|
26
26
|
DataFile.LRG_REFSEQGENE.lower(),
|
27
|
+
DataFile.MANE_REFSEQ_GENOMIC.lower(),
|
27
28
|
"liftover",
|
28
29
|
),
|
29
30
|
)
|
@@ -37,6 +38,7 @@ async def check_status(
|
|
37
38
|
sr: SeqRepo | None = None,
|
38
39
|
chain_file_37_to_38: str | None = None,
|
39
40
|
chain_file_38_to_37: str | None = None,
|
41
|
+
mane_refseq_genomic_path: str | None = None,
|
40
42
|
) -> ResourceStatus:
|
41
43
|
"""Perform basic status checks on availability of required data resources.
|
42
44
|
|
@@ -62,6 +64,7 @@ async def check_status(
|
|
62
64
|
is used for ``agct``. If this is not provided, will check to see if
|
63
65
|
``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will allow
|
64
66
|
``agct`` to download a chain file from UCSC
|
67
|
+
:param mane_refseq_genomic_path: Optional path to MANE RefSeq Genomic GFF data
|
65
68
|
:return: boolean description of availability of each resource, given current
|
66
69
|
environment configurations
|
67
70
|
"""
|
@@ -69,19 +72,21 @@ async def check_status(
|
|
69
72
|
DataFile.TRANSCRIPT_MAPPINGS.lower(): transcript_file_path,
|
70
73
|
DataFile.LRG_REFSEQGENE.lower(): lrg_refseqgene_path,
|
71
74
|
DataFile.MANE_SUMMARY.lower(): mane_data_path,
|
75
|
+
DataFile.MANE_REFSEQ_GENOMIC.lower(): mane_refseq_genomic_path,
|
72
76
|
}
|
73
77
|
|
74
78
|
status = {
|
75
79
|
DataFile.TRANSCRIPT_MAPPINGS.lower(): False,
|
76
80
|
DataFile.LRG_REFSEQGENE.lower(): False,
|
77
81
|
DataFile.MANE_SUMMARY.lower(): False,
|
82
|
+
DataFile.MANE_REFSEQ_GENOMIC.lower(): False,
|
78
83
|
"liftover": False,
|
79
84
|
"uta": False,
|
80
85
|
"seqrepo": False,
|
81
86
|
}
|
82
87
|
for r in list(DataFile):
|
83
88
|
name_lower = r.lower()
|
84
|
-
declared_path = file_path_params
|
89
|
+
declared_path = file_path_params.get(name_lower)
|
85
90
|
if declared_path and declared_path.exists() and declared_path.is_file():
|
86
91
|
status[name_lower] = True
|
87
92
|
continue
|
cool_seq_tool/schemas.py
CHANGED
cool_seq_tool/utils.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cool_seq_tool
|
3
|
-
Version: 0.14.
|
3
|
+
Version: 0.14.4
|
4
4
|
Summary: Common Operation on Lots of Sequences Tool
|
5
5
|
Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
|
6
6
|
License: MIT License
|
@@ -38,10 +38,10 @@ Classifier: Intended Audience :: Developers
|
|
38
38
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
39
39
|
Classifier: License :: OSI Approved :: MIT License
|
40
40
|
Classifier: Programming Language :: Python :: 3
|
41
|
-
Classifier: Programming Language :: Python :: 3.10
|
42
41
|
Classifier: Programming Language :: Python :: 3.11
|
43
42
|
Classifier: Programming Language :: Python :: 3.12
|
44
|
-
|
43
|
+
Classifier: Programming Language :: Python :: 3.13
|
44
|
+
Requires-Python: >=3.11
|
45
45
|
Description-Content-Type: text/markdown
|
46
46
|
License-File: LICENSE
|
47
47
|
Requires-Dist: asyncpg
|
@@ -1,25 +1,25 @@
|
|
1
1
|
cool_seq_tool/__init__.py,sha256=pJyVj7Z275BBAwpeFMm-WEn_tp-y1_ihRl1sLc4FFZY,400
|
2
2
|
cool_seq_tool/app.py,sha256=vyqlQRffC8sWZXMm-f_f-8WuTTWo3oRNfPUa_qdPV2M,4944
|
3
|
-
cool_seq_tool/schemas.py,sha256=
|
4
|
-
cool_seq_tool/utils.py,sha256=
|
3
|
+
cool_seq_tool/schemas.py,sha256=6c87iuA6v7BX7a8nkWEqFbJTksFysuuIeuYxkNCrAsI,5356
|
4
|
+
cool_seq_tool/utils.py,sha256=jra2ZHS7HUqXqabSvyqd5imf6kkhYL8nQd20BWNLpb8,2950
|
5
5
|
cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
|
6
6
|
cool_seq_tool/handlers/seqrepo_access.py,sha256=lRzPc8V0eZJTlefbHuVKeZTEC8-KcyPzpqX7vx3amu8,9118
|
7
7
|
cool_seq_tool/mappers/__init__.py,sha256=tavpwkNogg_nF1J_kb6Q9jk7ezqdRz063v7BMZ4koLM,390
|
8
|
-
cool_seq_tool/mappers/alignment.py,sha256=
|
9
|
-
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=
|
10
|
-
cool_seq_tool/mappers/feature_overlap.py,sha256=
|
8
|
+
cool_seq_tool/mappers/alignment.py,sha256=kWgYssM8YL-Z13H9GdpL77P7simNcbxltAs9YDXHE54,9640
|
9
|
+
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=fV4LyrpHPLRrx6AtV15g93q5XCH3i-y3Wj9tl-Cg8mM,45845
|
10
|
+
cool_seq_tool/mappers/feature_overlap.py,sha256=X5UFClaH6ixRsO2fDLxqjywp-Z0bvNx4uzgBICy394U,9758
|
11
11
|
cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
|
12
|
-
cool_seq_tool/mappers/mane_transcript.py,sha256=
|
12
|
+
cool_seq_tool/mappers/mane_transcript.py,sha256=IluiLBxPQoY-CxkpqpjEBcMlHvrNLa34wdKdQxtKgDY,54613
|
13
13
|
cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
|
14
14
|
cool_seq_tool/resources/data_files.py,sha256=6d1M5WjeFHdTQpzxqjQ78auQRZvIBVqH8QNCrmRRDXw,4205
|
15
|
-
cool_seq_tool/resources/status.py,sha256=
|
15
|
+
cool_seq_tool/resources/status.py,sha256=5UKx5FIQuyIY7FU4kSinDIM4MhLpr9_MiQDDBNt9kRo,5990
|
16
16
|
cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
|
17
17
|
cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
|
18
18
|
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
|
19
19
|
cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
|
20
20
|
cool_seq_tool/sources/uta_database.py,sha256=zzRzmYuybqzEg7zeuQjhK46SPK5GfbiWWNRGNJju8AI,36197
|
21
|
-
cool_seq_tool-0.14.
|
22
|
-
cool_seq_tool-0.14.
|
23
|
-
cool_seq_tool-0.14.
|
24
|
-
cool_seq_tool-0.14.
|
25
|
-
cool_seq_tool-0.14.
|
21
|
+
cool_seq_tool-0.14.4.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
|
22
|
+
cool_seq_tool-0.14.4.dist-info/METADATA,sha256=gPz48irXCwNUecTcgpKrdrHiBhi8R_Is55S2UYs9Qtk,6535
|
23
|
+
cool_seq_tool-0.14.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
24
|
+
cool_seq_tool-0.14.4.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
|
25
|
+
cool_seq_tool-0.14.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|