cool-seq-tool 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cool_seq_tool/schemas.py CHANGED
@@ -9,7 +9,6 @@ from pydantic import (
9
9
  ConfigDict,
10
10
  StrictInt,
11
11
  StrictStr,
12
- model_validator,
13
12
  )
14
13
 
15
14
  from cool_seq_tool import __version__
@@ -20,9 +19,9 @@ _now = str(datetime.datetime.now(tz=datetime.timezone.utc))
20
19
  class AnnotationLayer(str, Enum):
21
20
  """Create enum for supported annotation layers"""
22
21
 
23
- PROTEIN: Literal["p"] = "p"
24
- CDNA: Literal["c"] = "c"
25
- GENOMIC: Literal["g"] = "g"
22
+ PROTEIN = "p"
23
+ CDNA = "c"
24
+ GENOMIC = "g"
26
25
 
27
26
 
28
27
  class Strand(IntEnum):
@@ -53,15 +52,17 @@ class TranscriptPriority(str, Enum):
53
52
  GRCH38 = "grch38"
54
53
 
55
54
 
56
- class ResidueMode(str, Enum):
57
- """Create Enum for residue modes.
55
+ class CoordinateType(str, Enum):
56
+ """Create Enum for coordinate types.
58
57
 
59
- We typically prefer to operate in inter-residue coordinates, but users should be
58
+ It is preferred to operate in inter-residue coordinates, but users should be
60
59
  careful to define the coordinate mode of their data when calling ``cool-seq-tool``
61
60
  functions.
62
61
 
62
+ ``RESIDUE`` means 1-indexed, residue coordinates and ``INTER_RESIDUE`` means
63
+ 0-indexed, inter-residue coordinates.
64
+
63
65
  | | C | | T | | G | |
64
- ZERO | | 0 | | 1 | | 2 | |
65
66
  RESIDUE | | 1 | | 2 | | 3 | |
66
67
  INTER_RESIDUE | 0 | | 1 | | 2 | | 3 |
67
68
 
@@ -77,14 +78,6 @@ class ResidueMode(str, Enum):
77
78
  -
78
79
  - G
79
80
  -
80
- * - ``ZERO``
81
- -
82
- - 0
83
- -
84
- - 1
85
- -
86
- - 2
87
- -
88
81
  * - ``RESIDUE``
89
82
  -
90
83
  - 1
@@ -107,7 +100,6 @@ class ResidueMode(str, Enum):
107
100
  `Variation Representation Schema (VRS) paper <https://www.ncbi.nlm.nih.gov/pmc/articles/pmid/35311178/>`_ for further discussion.
108
101
  """
109
102
 
110
- ZERO = "zero"
111
103
  RESIDUE = "residue"
112
104
  INTER_RESIDUE = "inter-residue"
113
105
 
@@ -116,97 +108,35 @@ class BaseModelForbidExtra(BaseModel, extra="forbid"):
116
108
  """Base Pydantic model class with extra values forbidden."""
117
109
 
118
110
 
119
- class ManeGeneData(BaseModel, extra="forbid"):
120
- """Define minimal object model for representing a MANE gene"""
121
-
122
- ncbi_gene_id: StrictInt
123
- hgnc_id: StrictInt | None
124
- symbol: StrictStr
111
+ class GenomicTxData(BaseModelForbidExtra):
112
+ """Represent aligned genomic/transcript exon data"""
125
113
 
126
-
127
- class TranscriptExonData(BaseModelForbidExtra):
128
- """Model containing transcript exon data."""
129
-
130
- transcript: StrictStr
131
- pos: StrictInt
132
- exon: StrictInt
133
- exon_offset: StrictInt = 0
134
- gene: StrictStr
135
- chr: StrictStr
114
+ gene: str
136
115
  strand: Strand
116
+ tx_pos_range: tuple[int, int]
117
+ alt_pos_range: tuple[int, int]
118
+ alt_aln_method: str
119
+ tx_exon_id: int
120
+ alt_exon_id: int
137
121
 
138
- model_config = ConfigDict(
139
- json_schema_extra={
140
- "example": {
141
- "chr": "NC_000001.11",
142
- "gene": "TPM3",
143
- "pos": 154192135,
144
- "exon": 1,
145
- "exon_offset": 0,
146
- "transcript": "NM_152263.3",
147
- "strand": Strand.NEGATIVE,
148
- }
149
- }
150
- )
151
122
 
123
+ class GenomicTxMetadata(GenomicTxData):
124
+ """Store relevant metadata for genomic and transcript accessions"""
152
125
 
153
- class GenomicData(BaseModelForbidExtra):
154
- """Model containing genomic and transcript exon data."""
126
+ tx_ac: str
127
+ alt_ac: str
128
+ coding_start_site: int = 0
129
+ coding_end_site: int = 0
130
+ alt_pos_change_range: tuple[int, int]
131
+ pos_change: tuple[int, int] | None
155
132
 
156
- gene: StrictStr
157
- chr: StrictStr
158
- start: StrictInt | None = None # Genomic start position
159
- end: StrictInt | None = None # Genomic end position
160
- exon_start: StrictInt | None = None
161
- exon_start_offset: StrictInt | None = 0
162
- exon_end: StrictInt | None = None
163
- exon_end_offset: StrictInt | None = 0
164
- transcript: StrictStr
165
- strand: Strand
166
133
 
167
- @model_validator(mode="after")
168
- def check_start_end(cls, values):
169
- """Check that at least one of {``start``, ``end``} is set.
170
- Check that at least one of {``exon_start``, ``exon_end``} is set.
171
- If not set, set corresponding offset to ``None``
172
- """
173
- start = values.start
174
- end = values.end
175
- if not start and not end:
176
- msg = "Missing values for `start` or `end`"
177
- raise ValueError(msg)
178
-
179
- if start:
180
- if not values.exon_start:
181
- msg = "Missing value `exon_start`"
182
- raise ValueError(msg)
183
- else:
184
- values.exon_start_offset = None
185
-
186
- if end:
187
- if not values.exon_end:
188
- msg = "Missing value `exon_end`"
189
- raise ValueError(msg)
190
- else:
191
- values.exon_end_offset = None
192
- return values
134
+ class ManeGeneData(BaseModel, extra="forbid"):
135
+ """Define minimal object model for representing a MANE gene"""
193
136
 
194
- model_config = ConfigDict(
195
- json_schema_extra={
196
- "example": {
197
- "gene": "TPM3",
198
- "chr": "NC_000001.11",
199
- "start": 154192135,
200
- "end": None,
201
- "exon_start": 1,
202
- "exon_end": None,
203
- "exon_start_offset": 0,
204
- "exon_end_offset": None,
205
- "transcript": "NM_152263.3",
206
- "strand": Strand.NEGATIVE,
207
- }
208
- }
209
- )
137
+ ncbi_gene_id: StrictInt
138
+ hgnc_id: StrictInt | None
139
+ symbol: StrictStr
210
140
 
211
141
 
212
142
  class ServiceMeta(BaseModelForbidExtra):
@@ -229,68 +159,3 @@ class ServiceMeta(BaseModelForbidExtra):
229
159
  }
230
160
  }
231
161
  )
232
-
233
-
234
- class TranscriptExonDataResponse(BaseModelForbidExtra):
235
- """Response model for Transcript Exon Data"""
236
-
237
- transcript_exon_data: TranscriptExonData | None = None
238
- warnings: list[StrictStr] = []
239
- service_meta: ServiceMeta
240
-
241
- model_config = ConfigDict(
242
- json_schema_extra={
243
- "example": {
244
- "transcript_exon_data": {
245
- "chr": "NC_000001.11",
246
- "gene": "TPM3",
247
- "pos": 154192135,
248
- "exon": 1,
249
- "exon_offset": 0,
250
- "transcript": "NM_152263.3",
251
- "strand": Strand.NEGATIVE,
252
- },
253
- "warnings": [],
254
- "service_meta": {
255
- "name": "cool_seq_tool",
256
- "version": __version__,
257
- "response_datetime": _now,
258
- "url": "https://github.com/GenomicMedLab/cool-seq-tool",
259
- },
260
- }
261
- }
262
- )
263
-
264
-
265
- class GenomicDataResponse(BaseModelForbidExtra):
266
- """Response model for Genomic Data"""
267
-
268
- genomic_data: GenomicData | None = None
269
- warnings: list[StrictStr] = []
270
- service_meta: ServiceMeta
271
-
272
- model_config = ConfigDict(
273
- json_schema_extra={
274
- "example": {
275
- "genomic_data": {
276
- "gene": "TPM3",
277
- "chr": "NC_000001.11",
278
- "start": 154192135,
279
- "end": None,
280
- "exon_start": 1,
281
- "exon_end": None,
282
- "exon_start_offset": 0,
283
- "exon_end_offset": None,
284
- "transcript": "NM_152263.3",
285
- "strand": Strand.NEGATIVE,
286
- },
287
- "warnings": [],
288
- "service_meta": {
289
- "name": "cool_seq_tool",
290
- "version": __version__,
291
- "response_datetime": _now,
292
- "url": "https://github.com/GenomicMedLab/cool-seq-tool",
293
- },
294
- }
295
- }
296
- )