biofiles 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biofiles/feature.py +29 -16
- biofiles/gff.py +1 -1
- biofiles/gtf.py +1 -1
- biofiles/repeatmasker.py +2 -2
- biofiles/types/feature.py +1 -2
- {biofiles-0.0.8.dist-info → biofiles-0.0.9.dist-info}/METADATA +1 -1
- biofiles-0.0.9.dist-info/RECORD +16 -0
- biofiles-0.0.8.dist-info/RECORD +0 -16
- {biofiles-0.0.8.dist-info → biofiles-0.0.9.dist-info}/LICENSE +0 -0
- {biofiles-0.0.8.dist-info → biofiles-0.0.9.dist-info}/WHEEL +0 -0
- {biofiles-0.0.8.dist-info → biofiles-0.0.9.dist-info}/top_level.txt +0 -0
biofiles/feature.py
CHANGED
@@ -1,10 +1,17 @@
|
|
1
1
|
from collections import deque
|
2
2
|
from dataclasses import dataclass, field
|
3
3
|
from pathlib import Path
|
4
|
-
from typing import Iterator, TextIO, Type, TypeVar
|
4
|
+
from typing import Iterator, TextIO, Type, TypeVar, cast
|
5
5
|
|
6
6
|
from biofiles.common import Reader, Strand
|
7
|
-
from biofiles.types.feature import
|
7
|
+
from biofiles.types.feature import (
|
8
|
+
Feature,
|
9
|
+
Gene,
|
10
|
+
ThreePrimeUTR,
|
11
|
+
Exon,
|
12
|
+
UTR,
|
13
|
+
Transcript,
|
14
|
+
)
|
8
15
|
|
9
16
|
|
10
17
|
@dataclass
|
@@ -60,6 +67,12 @@ class Features:
|
|
60
67
|
self.by_id[id_] = feature
|
61
68
|
|
62
69
|
|
70
|
+
FeatureT = TypeVar("FeatureT", bound=Feature)
|
71
|
+
GeneT = TypeVar("GeneT", bound=Gene)
|
72
|
+
TranscriptT = TypeVar("TranscriptT", bound=Transcript)
|
73
|
+
UTRT = TypeVar("UTRT", bound=UTR)
|
74
|
+
|
75
|
+
|
63
76
|
class FeatureReader(Reader):
|
64
77
|
def __init__(
|
65
78
|
self, input_: TextIO | Path | str, /, streaming_window: int | None = 1000
|
@@ -107,10 +120,10 @@ class FeatureReader(Reader):
|
|
107
120
|
|
108
121
|
def _finalize_draft(self, draft: FeatureDraft, result: Features) -> Feature:
|
109
122
|
match draft.type_.lower():
|
110
|
-
case "gene":
|
111
|
-
feature = self._finalize_gene(draft, result)
|
112
|
-
case "transcript":
|
113
|
-
feature = self._finalize_transcript(draft, result)
|
123
|
+
case "gene" | "ncrna_gene":
|
124
|
+
feature = self._finalize_gene(draft, result, Gene)
|
125
|
+
case "transcript" | "mrna" | "lnc_rna":
|
126
|
+
feature = self._finalize_transcript(draft, result, Transcript)
|
114
127
|
case "exon":
|
115
128
|
feature = self._finalize_exon(draft, result)
|
116
129
|
case "three_prime_utr":
|
@@ -124,19 +137,23 @@ class FeatureReader(Reader):
|
|
124
137
|
object.__setattr__(feature.parent, "children", new_children)
|
125
138
|
return feature
|
126
139
|
|
127
|
-
def _finalize_gene(
|
140
|
+
def _finalize_gene(
|
141
|
+
self, draft: FeatureDraft, result: Features, type_: Type[GeneT]
|
142
|
+
) -> Feature:
|
128
143
|
feature = self._finalize_other(draft, result)
|
129
144
|
name = draft.pick_attribute("gene_name", "Name")
|
130
145
|
biotype = draft.pick_attribute("gene_biotype", "biotype", "gene_type")
|
131
146
|
if name is None or biotype is None:
|
132
147
|
return feature
|
133
|
-
return
|
148
|
+
return type_(**feature.__dict__, name=name, biotype=biotype, transcripts=())
|
134
149
|
|
135
|
-
def _finalize_transcript(
|
150
|
+
def _finalize_transcript(
|
151
|
+
self, draft: FeatureDraft, result: Features, type_: Type[TranscriptT]
|
152
|
+
) -> Feature:
|
136
153
|
feature = self._finalize_other(draft, result)
|
137
154
|
if not (gene := self._find_ancestor_of_type(feature, Gene)):
|
138
155
|
return feature
|
139
|
-
transcript =
|
156
|
+
transcript = type_(**feature.__dict__, gene=gene, exons=())
|
140
157
|
object.__setattr__(gene, "transcripts", gene.transcripts + (transcript,))
|
141
158
|
return transcript
|
142
159
|
|
@@ -148,25 +165,21 @@ class FeatureReader(Reader):
|
|
148
165
|
object.__setattr__(transcript, "exons", transcript.exons + (exon,))
|
149
166
|
return exon
|
150
167
|
|
151
|
-
UTRT = TypeVar("UTRT", bound=UTR)
|
152
|
-
|
153
168
|
def _finalize_utr(
|
154
169
|
self, draft: FeatureDraft, result: Features, type_: Type[UTRT]
|
155
|
-
) -> Feature
|
170
|
+
) -> Feature:
|
156
171
|
feature = self._finalize_other(draft, result)
|
157
172
|
if not (transcript := self._find_ancestor_of_type(feature, Transcript)):
|
158
173
|
return feature
|
159
174
|
return type_(**feature.__dict__, gene=transcript.gene, transcript=transcript)
|
160
175
|
|
161
|
-
FeatureT = TypeVar("FeatureT", bound=Feature)
|
162
|
-
|
163
176
|
def _find_ancestor_of_type(
|
164
177
|
self, feature: Feature, t: Type[FeatureT]
|
165
178
|
) -> FeatureT | None:
|
166
179
|
ancestor = feature.parent
|
167
180
|
while ancestor and not isinstance(ancestor, t):
|
168
181
|
ancestor = ancestor.parent
|
169
|
-
return ancestor
|
182
|
+
return cast(FeatureT | None, ancestor)
|
170
183
|
|
171
184
|
def _finalize_other(self, draft: FeatureDraft, result: Features) -> Feature:
|
172
185
|
parent_id = self._extract_parent_id(draft)
|
biofiles/gff.py
CHANGED
@@ -148,7 +148,7 @@ if __name__ == "__main__":
|
|
148
148
|
parsed_utrs = 0
|
149
149
|
for feature in r:
|
150
150
|
total_features += 1
|
151
|
-
annotated_genes += feature.type_
|
151
|
+
annotated_genes += "gene" in feature.type_.lower()
|
152
152
|
annotated_exons += feature.type_ == "exon"
|
153
153
|
annotated_utrs += "utr" in feature.type_.lower()
|
154
154
|
parsed_genes += isinstance(feature, Gene)
|
biofiles/gtf.py
CHANGED
@@ -31,7 +31,7 @@ if __name__ == "__main__":
|
|
31
31
|
parsed_utrs = 0
|
32
32
|
for feature in r:
|
33
33
|
total_features += 1
|
34
|
-
annotated_genes += feature.type_
|
34
|
+
annotated_genes += "gene" in feature.type_.lower()
|
35
35
|
annotated_exons += feature.type_ == "exon"
|
36
36
|
annotated_utrs += "utr" in feature.type_.lower()
|
37
37
|
parsed_genes += isinstance(feature, Gene)
|
biofiles/repeatmasker.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
import sys
|
2
2
|
from collections import Counter
|
3
|
-
from typing import Iterator
|
3
|
+
from typing import Iterator, cast, Literal
|
4
4
|
|
5
5
|
from biofiles.common import Reader
|
6
6
|
from biofiles.types.repeat import Repeat
|
@@ -42,7 +42,7 @@ class RepeatMaskerReader(Reader):
|
|
42
42
|
seq_start = int(seq_start_str)
|
43
43
|
seq_end = int(seq_end_str)
|
44
44
|
seq_left = int(seq_left_str[1:-1])
|
45
|
-
strand = {"+": "+", "C": "-"}[strand_str]
|
45
|
+
strand = cast(Literal["+", "-"], {"+": "+", "C": "-"}[strand_str])
|
46
46
|
|
47
47
|
if "/" in repeat_class_family:
|
48
48
|
repeat_class, repeat_family = repeat_class_family.split("/", 1)
|
biofiles/types/feature.py
CHANGED
@@ -26,7 +26,7 @@ class Feature:
|
|
26
26
|
attributes: dict[str, str]
|
27
27
|
|
28
28
|
id: str | None
|
29
|
-
parent: "
|
29
|
+
parent: "Feature | None"
|
30
30
|
children: tuple["Feature", ...]
|
31
31
|
|
32
32
|
|
@@ -50,7 +50,6 @@ class Transcript(Feature):
|
|
50
50
|
class Exon(Feature):
|
51
51
|
gene: Gene
|
52
52
|
transcript: Transcript
|
53
|
-
# TODO mRNA
|
54
53
|
|
55
54
|
|
56
55
|
@dataclass(frozen=True)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: biofiles
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.9
|
4
4
|
Summary: Pure-Python, zero-dependency collection of bioinformatics-related file readers and writers
|
5
5
|
Author-email: Tigran Saluev <tigran@saluev.com>
|
6
6
|
Maintainer-email: Tigran Saluev <tigran@saluev.com>
|
@@ -0,0 +1,16 @@
|
|
1
|
+
biofiles/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
biofiles/common.py,sha256=Yi0i85FpD2wR3vqL645LTUAE6TybGDxxZQsUmEGHqu4,1126
|
3
|
+
biofiles/fasta.py,sha256=ctIt5I_fcZx-xQN921zpmlZS7e9_ICf-3_i6mTs5qbs,2135
|
4
|
+
biofiles/feature.py,sha256=tUTn16xV1e0qpgkZ1ZwQ4LJJGil5mgQJBJ9s1yFDgiI,8068
|
5
|
+
biofiles/gff.py,sha256=6xmwnuU1CsFibIHzbggYJajzQC4KGsGAfWMxyYFFChw,5798
|
6
|
+
biofiles/gtf.py,sha256=kAt_5ifb0f8cCR-kycnQhkyo78xOynqTUUGqgOP8tjA,1543
|
7
|
+
biofiles/repeatmasker.py,sha256=txOYdw15ru88pUczsk0pDFzgGpplLu23CB8Ppz-MczY,3119
|
8
|
+
biofiles/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
+
biofiles/types/feature.py,sha256=3Ar45WRgiaDSh5iQt24Emtk6_57G01q5nHJ1GNIJ19Y,1190
|
10
|
+
biofiles/types/repeat.py,sha256=63SqzAwEGIDIGP9pxC85RUdwXbbSm0S5WNL3lSiWlmc,641
|
11
|
+
biofiles/types/sequence.py,sha256=EOw_oKuMR0THpCYJqVE__27z7qrRqcdIPrRWTL4OFMw,152
|
12
|
+
biofiles-0.0.9.dist-info/LICENSE,sha256=CbR8ssdFyViKj25JAlMjIt1_FbiZ1tAC5t-uwUbxqak,1070
|
13
|
+
biofiles-0.0.9.dist-info/METADATA,sha256=gLu3ufoag4tZllgq9xCDZe_kA24RXuI4TqQdAI_QIKw,3033
|
14
|
+
biofiles-0.0.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
15
|
+
biofiles-0.0.9.dist-info/top_level.txt,sha256=laFaFv8hpkI4U-Pgs0yBaAJXN2_CJKl7jb-m3-tGfSc,9
|
16
|
+
biofiles-0.0.9.dist-info/RECORD,,
|
biofiles-0.0.8.dist-info/RECORD
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
biofiles/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
biofiles/common.py,sha256=Yi0i85FpD2wR3vqL645LTUAE6TybGDxxZQsUmEGHqu4,1126
|
3
|
-
biofiles/fasta.py,sha256=ctIt5I_fcZx-xQN921zpmlZS7e9_ICf-3_i6mTs5qbs,2135
|
4
|
-
biofiles/feature.py,sha256=oZKNkZrCJjg4-AutGy3rri0gq-FRyo7vLwUzYG1EY7g,7809
|
5
|
-
biofiles/gff.py,sha256=LIbHGkpSTo-iMeatt2opPFlpNs8tHyv9XHPIVwzh3m8,5790
|
6
|
-
biofiles/gtf.py,sha256=eQsnpTjDaxrBeQ8uHzXy6C6sj8OvenFv9zwkFlytQYM,1535
|
7
|
-
biofiles/repeatmasker.py,sha256=DqD1z1hUfCP4-qnfjF-oMF-ZpW_6XhOf_nzA8VHhQbw,3079
|
8
|
-
biofiles/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
-
biofiles/types/feature.py,sha256=N6IIip7YqtSib5w_VLX1cBVwja8iWfa5AJncsKBs1PU,1209
|
10
|
-
biofiles/types/repeat.py,sha256=63SqzAwEGIDIGP9pxC85RUdwXbbSm0S5WNL3lSiWlmc,641
|
11
|
-
biofiles/types/sequence.py,sha256=EOw_oKuMR0THpCYJqVE__27z7qrRqcdIPrRWTL4OFMw,152
|
12
|
-
biofiles-0.0.8.dist-info/LICENSE,sha256=CbR8ssdFyViKj25JAlMjIt1_FbiZ1tAC5t-uwUbxqak,1070
|
13
|
-
biofiles-0.0.8.dist-info/METADATA,sha256=B0rgF4FGa2lgMehk6LdOEhHB2jddaoc76fteG3p4dp0,3033
|
14
|
-
biofiles-0.0.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
15
|
-
biofiles-0.0.8.dist-info/top_level.txt,sha256=laFaFv8hpkI4U-Pgs0yBaAJXN2_CJKl7jb-m3-tGfSc,9
|
16
|
-
biofiles-0.0.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|