biofiles 0.0.7__tar.gz → 0.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {biofiles-0.0.7 → biofiles-0.0.8}/PKG-INFO +1 -1
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles/feature.py +34 -20
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles/gff.py +9 -2
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles/gtf.py +9 -2
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles/types/feature.py +10 -3
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles.egg-info/PKG-INFO +1 -1
- {biofiles-0.0.7 → biofiles-0.0.8}/pyproject.toml +1 -1
- {biofiles-0.0.7 → biofiles-0.0.8}/LICENSE +0 -0
- {biofiles-0.0.7 → biofiles-0.0.8}/README.md +0 -0
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles/__init__.py +0 -0
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles/common.py +0 -0
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles/fasta.py +0 -0
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles/repeatmasker.py +0 -0
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles/types/__init__.py +0 -0
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles/types/repeat.py +0 -0
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles/types/sequence.py +0 -0
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles.egg-info/SOURCES.txt +0 -0
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles.egg-info/dependency_links.txt +0 -0
- {biofiles-0.0.7 → biofiles-0.0.8}/biofiles.egg-info/top_level.txt +0 -0
- {biofiles-0.0.7 → biofiles-0.0.8}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: biofiles
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.8
|
4
4
|
Summary: Pure-Python, zero-dependency collection of bioinformatics-related file readers and writers
|
5
5
|
Author-email: Tigran Saluev <tigran@saluev.com>
|
6
6
|
Maintainer-email: Tigran Saluev <tigran@saluev.com>
|
@@ -1,10 +1,10 @@
|
|
1
1
|
from collections import deque
|
2
2
|
from dataclasses import dataclass, field
|
3
3
|
from pathlib import Path
|
4
|
-
from typing import Iterator, TextIO, Type
|
4
|
+
from typing import Iterator, TextIO, Type, TypeVar
|
5
5
|
|
6
6
|
from biofiles.common import Reader, Strand
|
7
|
-
from biofiles.types.feature import Feature, Gene, ThreePrimeUTR, Exon, UTR
|
7
|
+
from biofiles.types.feature import Feature, Gene, ThreePrimeUTR, Exon, UTR, Transcript
|
8
8
|
|
9
9
|
|
10
10
|
@dataclass
|
@@ -109,6 +109,8 @@ class FeatureReader(Reader):
|
|
109
109
|
match draft.type_.lower():
|
110
110
|
case "gene":
|
111
111
|
feature = self._finalize_gene(draft, result)
|
112
|
+
case "transcript":
|
113
|
+
feature = self._finalize_transcript(draft, result)
|
112
114
|
case "exon":
|
113
115
|
feature = self._finalize_exon(draft, result)
|
114
116
|
case "three_prime_utr":
|
@@ -128,33 +130,43 @@ class FeatureReader(Reader):
|
|
128
130
|
biotype = draft.pick_attribute("gene_biotype", "biotype", "gene_type")
|
129
131
|
if name is None or biotype is None:
|
130
132
|
return feature
|
131
|
-
return Gene(**feature.__dict__, name=name, biotype=biotype,
|
133
|
+
return Gene(**feature.__dict__, name=name, biotype=biotype, transcripts=())
|
132
134
|
|
133
|
-
def
|
135
|
+
def _finalize_transcript(self, draft: FeatureDraft, result: Features) -> Feature:
|
134
136
|
feature = self._finalize_other(draft, result)
|
137
|
+
if not (gene := self._find_ancestor_of_type(feature, Gene)):
|
138
|
+
return feature
|
139
|
+
transcript = Transcript(**feature.__dict__, gene=gene, exons=())
|
140
|
+
object.__setattr__(gene, "transcripts", gene.transcripts + (transcript,))
|
141
|
+
return transcript
|
135
142
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
if gene is None:
|
143
|
+
def _finalize_exon(self, draft: FeatureDraft, result: Features) -> Feature:
|
144
|
+
feature = self._finalize_other(draft, result)
|
145
|
+
if not (transcript := self._find_ancestor_of_type(feature, Transcript)):
|
141
146
|
return feature
|
142
|
-
exon = Exon(**feature.__dict__, gene=gene)
|
143
|
-
object.__setattr__(
|
147
|
+
exon = Exon(**feature.__dict__, gene=transcript.gene, transcript=transcript)
|
148
|
+
object.__setattr__(transcript, "exons", transcript.exons + (exon,))
|
144
149
|
return exon
|
145
150
|
|
151
|
+
UTRT = TypeVar("UTRT", bound=UTR)
|
152
|
+
|
146
153
|
def _finalize_utr(
|
147
|
-
self, draft: FeatureDraft, result: Features, type_: Type[
|
148
|
-
) -> Feature:
|
154
|
+
self, draft: FeatureDraft, result: Features, type_: Type[UTRT]
|
155
|
+
) -> Feature | UTRT:
|
149
156
|
feature = self._finalize_other(draft, result)
|
157
|
+
if not (transcript := self._find_ancestor_of_type(feature, Transcript)):
|
158
|
+
return feature
|
159
|
+
return type_(**feature.__dict__, gene=transcript.gene, transcript=transcript)
|
150
160
|
|
151
|
-
|
152
|
-
while gene and not isinstance(gene, Gene):
|
153
|
-
gene = gene.parent
|
161
|
+
FeatureT = TypeVar("FeatureT", bound=Feature)
|
154
162
|
|
155
|
-
|
156
|
-
|
157
|
-
|
163
|
+
def _find_ancestor_of_type(
|
164
|
+
self, feature: Feature, t: Type[FeatureT]
|
165
|
+
) -> FeatureT | None:
|
166
|
+
ancestor = feature.parent
|
167
|
+
while ancestor and not isinstance(ancestor, t):
|
168
|
+
ancestor = ancestor.parent
|
169
|
+
return ancestor
|
158
170
|
|
159
171
|
def _finalize_other(self, draft: FeatureDraft, result: Features) -> Feature:
|
160
172
|
parent_id = self._extract_parent_id(draft)
|
@@ -193,6 +205,8 @@ class FeatureReader(Reader):
|
|
193
205
|
return id_
|
194
206
|
if draft.type_ == "transcript" and (id_ := draft.attributes.get("gene_id")):
|
195
207
|
return id_
|
196
|
-
if draft.type_
|
208
|
+
if draft.type_ in ("exon", "UTR", "three_prime_UTR", "five_prime_UTR") and (
|
209
|
+
id_ := draft.attributes.get("transcript_id")
|
210
|
+
):
|
197
211
|
return id_
|
198
212
|
return None
|
@@ -4,7 +4,7 @@ from typing import Iterator, cast, TextIO
|
|
4
4
|
|
5
5
|
from biofiles.common import Strand, Writer
|
6
6
|
from biofiles.feature import FeatureReader, FeatureDraft, FeatureDrafts
|
7
|
-
from biofiles.types.feature import Feature, Gene, Exon
|
7
|
+
from biofiles.types.feature import Feature, Gene, Exon, UTR
|
8
8
|
|
9
9
|
__all__ = ["GFFReader", "GFF3Writer"]
|
10
10
|
|
@@ -142,14 +142,21 @@ if __name__ == "__main__":
|
|
142
142
|
total_features = 0
|
143
143
|
annotated_genes = 0
|
144
144
|
annotated_exons = 0
|
145
|
+
annotated_utrs = 0
|
145
146
|
parsed_genes = 0
|
146
147
|
parsed_exons = 0
|
148
|
+
parsed_utrs = 0
|
147
149
|
for feature in r:
|
148
150
|
total_features += 1
|
149
151
|
annotated_genes += feature.type_ == "gene"
|
150
152
|
annotated_exons += feature.type_ == "exon"
|
153
|
+
annotated_utrs += "utr" in feature.type_.lower()
|
151
154
|
parsed_genes += isinstance(feature, Gene)
|
152
155
|
parsed_exons += isinstance(feature, Exon)
|
156
|
+
parsed_utrs += isinstance(feature, UTR)
|
153
157
|
print(
|
154
|
-
f"{path}: {total_features} features,
|
158
|
+
f"{path}: {total_features} features, "
|
159
|
+
f"{parsed_genes} genes parsed out of {annotated_genes}, "
|
160
|
+
f"{parsed_exons} exons parsed out of {annotated_exons}, "
|
161
|
+
f"{parsed_utrs} UTRs parsed out of {annotated_utrs}"
|
155
162
|
)
|
@@ -4,7 +4,7 @@ import sys
|
|
4
4
|
from typing import Iterator
|
5
5
|
|
6
6
|
from biofiles.gff import GFFReader
|
7
|
-
from biofiles.types.feature import Gene, Exon, Feature
|
7
|
+
from biofiles.types.feature import Gene, Exon, Feature, UTR
|
8
8
|
|
9
9
|
|
10
10
|
class GTFReader(GFFReader):
|
@@ -25,14 +25,21 @@ if __name__ == "__main__":
|
|
25
25
|
total_features = 0
|
26
26
|
annotated_genes = 0
|
27
27
|
annotated_exons = 0
|
28
|
+
annotated_utrs = 0
|
28
29
|
parsed_genes = 0
|
29
30
|
parsed_exons = 0
|
31
|
+
parsed_utrs = 0
|
30
32
|
for feature in r:
|
31
33
|
total_features += 1
|
32
34
|
annotated_genes += feature.type_ == "gene"
|
33
35
|
annotated_exons += feature.type_ == "exon"
|
36
|
+
annotated_utrs += "utr" in feature.type_.lower()
|
34
37
|
parsed_genes += isinstance(feature, Gene)
|
35
38
|
parsed_exons += isinstance(feature, Exon)
|
39
|
+
parsed_utrs += isinstance(feature, UTR)
|
36
40
|
print(
|
37
|
-
f"{path}: {total_features} features,
|
41
|
+
f"{path}: {total_features} features, "
|
42
|
+
f"{parsed_genes} genes parsed out of {annotated_genes}, "
|
43
|
+
f"{parsed_exons} exons parsed out of {annotated_exons}, "
|
44
|
+
f"{parsed_utrs} UTRs parsed out of {annotated_utrs}"
|
38
45
|
)
|
@@ -3,7 +3,7 @@ from dataclasses import dataclass
|
|
3
3
|
from biofiles.common import Strand
|
4
4
|
|
5
5
|
|
6
|
-
__all__ = ["Feature", "Gene", "Exon"]
|
6
|
+
__all__ = ["Feature", "Gene", "Transcript", "Exon", "UTR", "ThreePrimeUTR"]
|
7
7
|
|
8
8
|
|
9
9
|
@dataclass(frozen=True)
|
@@ -37,19 +37,26 @@ class Feature:
|
|
37
37
|
class Gene(Feature):
|
38
38
|
name: str
|
39
39
|
biotype: str
|
40
|
+
transcripts: tuple["Transcript", ...]
|
41
|
+
|
42
|
+
|
43
|
+
@dataclass(frozen=True)
|
44
|
+
class Transcript(Feature):
|
45
|
+
gene: Gene
|
40
46
|
exons: tuple["Exon", ...]
|
41
47
|
|
42
48
|
|
43
49
|
@dataclass(frozen=True)
|
44
50
|
class Exon(Feature):
|
45
51
|
gene: Gene
|
46
|
-
|
52
|
+
transcript: Transcript
|
53
|
+
# TODO mRNA
|
47
54
|
|
48
55
|
|
49
56
|
@dataclass(frozen=True)
|
50
57
|
class UTR(Feature):
|
51
58
|
gene: Gene
|
52
|
-
|
59
|
+
transcript: Transcript
|
53
60
|
|
54
61
|
|
55
62
|
@dataclass(frozen=True)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: biofiles
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.8
|
4
4
|
Summary: Pure-Python, zero-dependency collection of bioinformatics-related file readers and writers
|
5
5
|
Author-email: Tigran Saluev <tigran@saluev.com>
|
6
6
|
Maintainer-email: Tigran Saluev <tigran@saluev.com>
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|