cool-seq-tool 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/__init__.py +6 -0
- cool_seq_tool/app.py +1 -2
- cool_seq_tool/handlers/seqrepo_access.py +5 -5
- cool_seq_tool/mappers/alignment.py +16 -16
- cool_seq_tool/mappers/exon_genomic_coords.py +845 -628
- cool_seq_tool/mappers/mane_transcript.py +109 -104
- cool_seq_tool/schemas.py +30 -165
- cool_seq_tool/sources/uta_database.py +149 -229
- cool_seq_tool/utils.py +9 -9
- {cool_seq_tool-0.6.0.dist-info → cool_seq_tool-0.7.0.dist-info}/METADATA +8 -8
- cool_seq_tool-0.7.0.dist-info/RECORD +24 -0
- {cool_seq_tool-0.6.0.dist-info → cool_seq_tool-0.7.0.dist-info}/WHEEL +1 -1
- cool_seq_tool-0.6.0.dist-info/RECORD +0 -24
- {cool_seq_tool-0.6.0.dist-info → cool_seq_tool-0.7.0.dist-info}/LICENSE +0 -0
- {cool_seq_tool-0.6.0.dist-info → cool_seq_tool-0.7.0.dist-info}/top_level.txt +0 -0
cool_seq_tool/schemas.py
CHANGED
@@ -9,7 +9,6 @@ from pydantic import (
|
|
9
9
|
ConfigDict,
|
10
10
|
StrictInt,
|
11
11
|
StrictStr,
|
12
|
-
model_validator,
|
13
12
|
)
|
14
13
|
|
15
14
|
from cool_seq_tool import __version__
|
@@ -20,9 +19,9 @@ _now = str(datetime.datetime.now(tz=datetime.timezone.utc))
|
|
20
19
|
class AnnotationLayer(str, Enum):
|
21
20
|
"""Create enum for supported annotation layers"""
|
22
21
|
|
23
|
-
PROTEIN
|
24
|
-
CDNA
|
25
|
-
GENOMIC
|
22
|
+
PROTEIN = "p"
|
23
|
+
CDNA = "c"
|
24
|
+
GENOMIC = "g"
|
26
25
|
|
27
26
|
|
28
27
|
class Strand(IntEnum):
|
@@ -53,15 +52,17 @@ class TranscriptPriority(str, Enum):
|
|
53
52
|
GRCH38 = "grch38"
|
54
53
|
|
55
54
|
|
56
|
-
class
|
57
|
-
"""Create Enum for
|
55
|
+
class CoordinateType(str, Enum):
|
56
|
+
"""Create Enum for coordinate types.
|
58
57
|
|
59
|
-
|
58
|
+
It is preferred to operate in inter-residue coordinates, but users should be
|
60
59
|
careful to define the coordinate mode of their data when calling ``cool-seq-tool``
|
61
60
|
functions.
|
62
61
|
|
62
|
+
``RESIDUE`` means 1-indexed, residue coordinates and ``INTER_RESIDUE`` means
|
63
|
+
0-indexed, inter-residue coordinates.
|
64
|
+
|
63
65
|
| | C | | T | | G | |
|
64
|
-
ZERO | | 0 | | 1 | | 2 | |
|
65
66
|
RESIDUE | | 1 | | 2 | | 3 | |
|
66
67
|
INTER_RESIDUE | 0 | | 1 | | 2 | | 3 |
|
67
68
|
|
@@ -77,14 +78,6 @@ class ResidueMode(str, Enum):
|
|
77
78
|
-
|
78
79
|
- G
|
79
80
|
-
|
80
|
-
* - ``ZERO``
|
81
|
-
-
|
82
|
-
- 0
|
83
|
-
-
|
84
|
-
- 1
|
85
|
-
-
|
86
|
-
- 2
|
87
|
-
-
|
88
81
|
* - ``RESIDUE``
|
89
82
|
-
|
90
83
|
- 1
|
@@ -107,7 +100,6 @@ class ResidueMode(str, Enum):
|
|
107
100
|
`Variation Representation Schema (VRS) paper <https://www.ncbi.nlm.nih.gov/pmc/articles/pmid/35311178/>`_ for further discussion.
|
108
101
|
"""
|
109
102
|
|
110
|
-
ZERO = "zero"
|
111
103
|
RESIDUE = "residue"
|
112
104
|
INTER_RESIDUE = "inter-residue"
|
113
105
|
|
@@ -116,97 +108,35 @@ class BaseModelForbidExtra(BaseModel, extra="forbid"):
|
|
116
108
|
"""Base Pydantic model class with extra values forbidden."""
|
117
109
|
|
118
110
|
|
119
|
-
class
|
120
|
-
"""
|
121
|
-
|
122
|
-
ncbi_gene_id: StrictInt
|
123
|
-
hgnc_id: StrictInt | None
|
124
|
-
symbol: StrictStr
|
111
|
+
class GenomicTxData(BaseModelForbidExtra):
|
112
|
+
"""Represent aligned genomic/transcript exon data"""
|
125
113
|
|
126
|
-
|
127
|
-
class TranscriptExonData(BaseModelForbidExtra):
|
128
|
-
"""Model containing transcript exon data."""
|
129
|
-
|
130
|
-
transcript: StrictStr
|
131
|
-
pos: StrictInt
|
132
|
-
exon: StrictInt
|
133
|
-
exon_offset: StrictInt = 0
|
134
|
-
gene: StrictStr
|
135
|
-
chr: StrictStr
|
114
|
+
gene: str
|
136
115
|
strand: Strand
|
116
|
+
tx_pos_range: tuple[int, int]
|
117
|
+
alt_pos_range: tuple[int, int]
|
118
|
+
alt_aln_method: str
|
119
|
+
tx_exon_id: int
|
120
|
+
alt_exon_id: int
|
137
121
|
|
138
|
-
model_config = ConfigDict(
|
139
|
-
json_schema_extra={
|
140
|
-
"example": {
|
141
|
-
"chr": "NC_000001.11",
|
142
|
-
"gene": "TPM3",
|
143
|
-
"pos": 154192135,
|
144
|
-
"exon": 1,
|
145
|
-
"exon_offset": 0,
|
146
|
-
"transcript": "NM_152263.3",
|
147
|
-
"strand": Strand.NEGATIVE,
|
148
|
-
}
|
149
|
-
}
|
150
|
-
)
|
151
122
|
|
123
|
+
class GenomicTxMetadata(GenomicTxData):
|
124
|
+
"""Store relevant metadata for genomic and transcript accessions"""
|
152
125
|
|
153
|
-
|
154
|
-
|
126
|
+
tx_ac: str
|
127
|
+
alt_ac: str
|
128
|
+
coding_start_site: int = 0
|
129
|
+
coding_end_site: int = 0
|
130
|
+
alt_pos_change_range: tuple[int, int]
|
131
|
+
pos_change: tuple[int, int] | None
|
155
132
|
|
156
|
-
gene: StrictStr
|
157
|
-
chr: StrictStr
|
158
|
-
start: StrictInt | None = None # Genomic start position
|
159
|
-
end: StrictInt | None = None # Genomic end position
|
160
|
-
exon_start: StrictInt | None = None
|
161
|
-
exon_start_offset: StrictInt | None = 0
|
162
|
-
exon_end: StrictInt | None = None
|
163
|
-
exon_end_offset: StrictInt | None = 0
|
164
|
-
transcript: StrictStr
|
165
|
-
strand: Strand
|
166
133
|
|
167
|
-
|
168
|
-
|
169
|
-
"""Check that at least one of {``start``, ``end``} is set.
|
170
|
-
Check that at least one of {``exon_start``, ``exon_end``} is set.
|
171
|
-
If not set, set corresponding offset to ``None``
|
172
|
-
"""
|
173
|
-
start = values.start
|
174
|
-
end = values.end
|
175
|
-
if not start and not end:
|
176
|
-
msg = "Missing values for `start` or `end`"
|
177
|
-
raise ValueError(msg)
|
178
|
-
|
179
|
-
if start:
|
180
|
-
if not values.exon_start:
|
181
|
-
msg = "Missing value `exon_start`"
|
182
|
-
raise ValueError(msg)
|
183
|
-
else:
|
184
|
-
values.exon_start_offset = None
|
185
|
-
|
186
|
-
if end:
|
187
|
-
if not values.exon_end:
|
188
|
-
msg = "Missing value `exon_end`"
|
189
|
-
raise ValueError(msg)
|
190
|
-
else:
|
191
|
-
values.exon_end_offset = None
|
192
|
-
return values
|
134
|
+
class ManeGeneData(BaseModel, extra="forbid"):
|
135
|
+
"""Define minimal object model for representing a MANE gene"""
|
193
136
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
"gene": "TPM3",
|
198
|
-
"chr": "NC_000001.11",
|
199
|
-
"start": 154192135,
|
200
|
-
"end": None,
|
201
|
-
"exon_start": 1,
|
202
|
-
"exon_end": None,
|
203
|
-
"exon_start_offset": 0,
|
204
|
-
"exon_end_offset": None,
|
205
|
-
"transcript": "NM_152263.3",
|
206
|
-
"strand": Strand.NEGATIVE,
|
207
|
-
}
|
208
|
-
}
|
209
|
-
)
|
137
|
+
ncbi_gene_id: StrictInt
|
138
|
+
hgnc_id: StrictInt | None
|
139
|
+
symbol: StrictStr
|
210
140
|
|
211
141
|
|
212
142
|
class ServiceMeta(BaseModelForbidExtra):
|
@@ -229,68 +159,3 @@ class ServiceMeta(BaseModelForbidExtra):
|
|
229
159
|
}
|
230
160
|
}
|
231
161
|
)
|
232
|
-
|
233
|
-
|
234
|
-
class TranscriptExonDataResponse(BaseModelForbidExtra):
|
235
|
-
"""Response model for Transcript Exon Data"""
|
236
|
-
|
237
|
-
transcript_exon_data: TranscriptExonData | None = None
|
238
|
-
warnings: list[StrictStr] = []
|
239
|
-
service_meta: ServiceMeta
|
240
|
-
|
241
|
-
model_config = ConfigDict(
|
242
|
-
json_schema_extra={
|
243
|
-
"example": {
|
244
|
-
"transcript_exon_data": {
|
245
|
-
"chr": "NC_000001.11",
|
246
|
-
"gene": "TPM3",
|
247
|
-
"pos": 154192135,
|
248
|
-
"exon": 1,
|
249
|
-
"exon_offset": 0,
|
250
|
-
"transcript": "NM_152263.3",
|
251
|
-
"strand": Strand.NEGATIVE,
|
252
|
-
},
|
253
|
-
"warnings": [],
|
254
|
-
"service_meta": {
|
255
|
-
"name": "cool_seq_tool",
|
256
|
-
"version": __version__,
|
257
|
-
"response_datetime": _now,
|
258
|
-
"url": "https://github.com/GenomicMedLab/cool-seq-tool",
|
259
|
-
},
|
260
|
-
}
|
261
|
-
}
|
262
|
-
)
|
263
|
-
|
264
|
-
|
265
|
-
class GenomicDataResponse(BaseModelForbidExtra):
|
266
|
-
"""Response model for Genomic Data"""
|
267
|
-
|
268
|
-
genomic_data: GenomicData | None = None
|
269
|
-
warnings: list[StrictStr] = []
|
270
|
-
service_meta: ServiceMeta
|
271
|
-
|
272
|
-
model_config = ConfigDict(
|
273
|
-
json_schema_extra={
|
274
|
-
"example": {
|
275
|
-
"genomic_data": {
|
276
|
-
"gene": "TPM3",
|
277
|
-
"chr": "NC_000001.11",
|
278
|
-
"start": 154192135,
|
279
|
-
"end": None,
|
280
|
-
"exon_start": 1,
|
281
|
-
"exon_end": None,
|
282
|
-
"exon_start_offset": 0,
|
283
|
-
"exon_end_offset": None,
|
284
|
-
"transcript": "NM_152263.3",
|
285
|
-
"strand": Strand.NEGATIVE,
|
286
|
-
},
|
287
|
-
"warnings": [],
|
288
|
-
"service_meta": {
|
289
|
-
"name": "cool_seq_tool",
|
290
|
-
"version": __version__,
|
291
|
-
"response_datetime": _now,
|
292
|
-
"url": "https://github.com/GenomicMedLab/cool-seq-tool",
|
293
|
-
},
|
294
|
-
}
|
295
|
-
}
|
296
|
-
)
|