biofiles 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
biofiles/feature.py CHANGED
@@ -1,10 +1,17 @@
1
1
  from collections import deque
2
2
  from dataclasses import dataclass, field
3
3
  from pathlib import Path
4
- from typing import Iterator, TextIO, Type, TypeVar
4
+ from typing import Iterator, TextIO, Type, TypeVar, cast
5
5
 
6
6
  from biofiles.common import Reader, Strand
7
- from biofiles.types.feature import Feature, Gene, ThreePrimeUTR, Exon, UTR, Transcript
7
+ from biofiles.types.feature import (
8
+ Feature,
9
+ Gene,
10
+ ThreePrimeUTR,
11
+ Exon,
12
+ UTR,
13
+ Transcript,
14
+ )
8
15
 
9
16
 
10
17
  @dataclass
@@ -60,6 +67,12 @@ class Features:
60
67
  self.by_id[id_] = feature
61
68
 
62
69
 
70
+ FeatureT = TypeVar("FeatureT", bound=Feature)
71
+ GeneT = TypeVar("GeneT", bound=Gene)
72
+ TranscriptT = TypeVar("TranscriptT", bound=Transcript)
73
+ UTRT = TypeVar("UTRT", bound=UTR)
74
+
75
+
63
76
  class FeatureReader(Reader):
64
77
  def __init__(
65
78
  self, input_: TextIO | Path | str, /, streaming_window: int | None = 1000
@@ -107,10 +120,10 @@ class FeatureReader(Reader):
107
120
 
108
121
  def _finalize_draft(self, draft: FeatureDraft, result: Features) -> Feature:
109
122
  match draft.type_.lower():
110
- case "gene":
111
- feature = self._finalize_gene(draft, result)
112
- case "transcript":
113
- feature = self._finalize_transcript(draft, result)
123
+ case "gene" | "ncrna_gene":
124
+ feature = self._finalize_gene(draft, result, Gene)
125
+ case "transcript" | "mrna" | "lnc_rna":
126
+ feature = self._finalize_transcript(draft, result, Transcript)
114
127
  case "exon":
115
128
  feature = self._finalize_exon(draft, result)
116
129
  case "three_prime_utr":
@@ -124,19 +137,23 @@ class FeatureReader(Reader):
124
137
  object.__setattr__(feature.parent, "children", new_children)
125
138
  return feature
126
139
 
127
- def _finalize_gene(self, draft: FeatureDraft, result: Features) -> Feature:
140
+ def _finalize_gene(
141
+ self, draft: FeatureDraft, result: Features, type_: Type[GeneT]
142
+ ) -> Feature:
128
143
  feature = self._finalize_other(draft, result)
129
144
  name = draft.pick_attribute("gene_name", "Name")
130
145
  biotype = draft.pick_attribute("gene_biotype", "biotype", "gene_type")
131
146
  if name is None or biotype is None:
132
147
  return feature
133
- return Gene(**feature.__dict__, name=name, biotype=biotype, transcripts=())
148
+ return type_(**feature.__dict__, name=name, biotype=biotype, transcripts=())
134
149
 
135
- def _finalize_transcript(self, draft: FeatureDraft, result: Features) -> Feature:
150
+ def _finalize_transcript(
151
+ self, draft: FeatureDraft, result: Features, type_: Type[TranscriptT]
152
+ ) -> Feature:
136
153
  feature = self._finalize_other(draft, result)
137
154
  if not (gene := self._find_ancestor_of_type(feature, Gene)):
138
155
  return feature
139
- transcript = Transcript(**feature.__dict__, gene=gene, exons=())
156
+ transcript = type_(**feature.__dict__, gene=gene, exons=())
140
157
  object.__setattr__(gene, "transcripts", gene.transcripts + (transcript,))
141
158
  return transcript
142
159
 
@@ -148,25 +165,21 @@ class FeatureReader(Reader):
148
165
  object.__setattr__(transcript, "exons", transcript.exons + (exon,))
149
166
  return exon
150
167
 
151
- UTRT = TypeVar("UTRT", bound=UTR)
152
-
153
168
  def _finalize_utr(
154
169
  self, draft: FeatureDraft, result: Features, type_: Type[UTRT]
155
- ) -> Feature | UTRT:
170
+ ) -> Feature:
156
171
  feature = self._finalize_other(draft, result)
157
172
  if not (transcript := self._find_ancestor_of_type(feature, Transcript)):
158
173
  return feature
159
174
  return type_(**feature.__dict__, gene=transcript.gene, transcript=transcript)
160
175
 
161
- FeatureT = TypeVar("FeatureT", bound=Feature)
162
-
163
176
  def _find_ancestor_of_type(
164
177
  self, feature: Feature, t: Type[FeatureT]
165
178
  ) -> FeatureT | None:
166
179
  ancestor = feature.parent
167
180
  while ancestor and not isinstance(ancestor, t):
168
181
  ancestor = ancestor.parent
169
- return ancestor
182
+ return cast(FeatureT | None, ancestor)
170
183
 
171
184
  def _finalize_other(self, draft: FeatureDraft, result: Features) -> Feature:
172
185
  parent_id = self._extract_parent_id(draft)
biofiles/gff.py CHANGED
@@ -148,7 +148,7 @@ if __name__ == "__main__":
148
148
  parsed_utrs = 0
149
149
  for feature in r:
150
150
  total_features += 1
151
- annotated_genes += feature.type_ == "gene"
151
+ annotated_genes += "gene" in feature.type_.lower()
152
152
  annotated_exons += feature.type_ == "exon"
153
153
  annotated_utrs += "utr" in feature.type_.lower()
154
154
  parsed_genes += isinstance(feature, Gene)
biofiles/gtf.py CHANGED
@@ -31,7 +31,7 @@ if __name__ == "__main__":
31
31
  parsed_utrs = 0
32
32
  for feature in r:
33
33
  total_features += 1
34
- annotated_genes += feature.type_ == "gene"
34
+ annotated_genes += "gene" in feature.type_.lower()
35
35
  annotated_exons += feature.type_ == "exon"
36
36
  annotated_utrs += "utr" in feature.type_.lower()
37
37
  parsed_genes += isinstance(feature, Gene)
biofiles/repeatmasker.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import sys
2
2
  from collections import Counter
3
- from typing import Iterator
3
+ from typing import Iterator, cast, Literal
4
4
 
5
5
  from biofiles.common import Reader
6
6
  from biofiles.types.repeat import Repeat
@@ -42,7 +42,7 @@ class RepeatMaskerReader(Reader):
42
42
  seq_start = int(seq_start_str)
43
43
  seq_end = int(seq_end_str)
44
44
  seq_left = int(seq_left_str[1:-1])
45
- strand = {"+": "+", "C": "-"}[strand_str]
45
+ strand = cast(Literal["+", "-"], {"+": "+", "C": "-"}[strand_str])
46
46
 
47
47
  if "/" in repeat_class_family:
48
48
  repeat_class, repeat_family = repeat_class_family.split("/", 1)
biofiles/types/feature.py CHANGED
@@ -26,7 +26,7 @@ class Feature:
26
26
  attributes: dict[str, str]
27
27
 
28
28
  id: str | None
29
- parent: "GFFFeature | None"
29
+ parent: "Feature | None"
30
30
  children: tuple["Feature", ...]
31
31
 
32
32
 
@@ -50,7 +50,6 @@ class Transcript(Feature):
50
50
  class Exon(Feature):
51
51
  gene: Gene
52
52
  transcript: Transcript
53
- # TODO mRNA
54
53
 
55
54
 
56
55
  @dataclass(frozen=True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: biofiles
3
- Version: 0.0.8
3
+ Version: 0.0.9
4
4
  Summary: Pure-Python, zero-dependency collection of bioinformatics-related file readers and writers
5
5
  Author-email: Tigran Saluev <tigran@saluev.com>
6
6
  Maintainer-email: Tigran Saluev <tigran@saluev.com>
@@ -0,0 +1,16 @@
1
+ biofiles/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ biofiles/common.py,sha256=Yi0i85FpD2wR3vqL645LTUAE6TybGDxxZQsUmEGHqu4,1126
3
+ biofiles/fasta.py,sha256=ctIt5I_fcZx-xQN921zpmlZS7e9_ICf-3_i6mTs5qbs,2135
4
+ biofiles/feature.py,sha256=tUTn16xV1e0qpgkZ1ZwQ4LJJGil5mgQJBJ9s1yFDgiI,8068
5
+ biofiles/gff.py,sha256=6xmwnuU1CsFibIHzbggYJajzQC4KGsGAfWMxyYFFChw,5798
6
+ biofiles/gtf.py,sha256=kAt_5ifb0f8cCR-kycnQhkyo78xOynqTUUGqgOP8tjA,1543
7
+ biofiles/repeatmasker.py,sha256=txOYdw15ru88pUczsk0pDFzgGpplLu23CB8Ppz-MczY,3119
8
+ biofiles/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ biofiles/types/feature.py,sha256=3Ar45WRgiaDSh5iQt24Emtk6_57G01q5nHJ1GNIJ19Y,1190
10
+ biofiles/types/repeat.py,sha256=63SqzAwEGIDIGP9pxC85RUdwXbbSm0S5WNL3lSiWlmc,641
11
+ biofiles/types/sequence.py,sha256=EOw_oKuMR0THpCYJqVE__27z7qrRqcdIPrRWTL4OFMw,152
12
+ biofiles-0.0.9.dist-info/LICENSE,sha256=CbR8ssdFyViKj25JAlMjIt1_FbiZ1tAC5t-uwUbxqak,1070
13
+ biofiles-0.0.9.dist-info/METADATA,sha256=gLu3ufoag4tZllgq9xCDZe_kA24RXuI4TqQdAI_QIKw,3033
14
+ biofiles-0.0.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
15
+ biofiles-0.0.9.dist-info/top_level.txt,sha256=laFaFv8hpkI4U-Pgs0yBaAJXN2_CJKl7jb-m3-tGfSc,9
16
+ biofiles-0.0.9.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- biofiles/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- biofiles/common.py,sha256=Yi0i85FpD2wR3vqL645LTUAE6TybGDxxZQsUmEGHqu4,1126
3
- biofiles/fasta.py,sha256=ctIt5I_fcZx-xQN921zpmlZS7e9_ICf-3_i6mTs5qbs,2135
4
- biofiles/feature.py,sha256=oZKNkZrCJjg4-AutGy3rri0gq-FRyo7vLwUzYG1EY7g,7809
5
- biofiles/gff.py,sha256=LIbHGkpSTo-iMeatt2opPFlpNs8tHyv9XHPIVwzh3m8,5790
6
- biofiles/gtf.py,sha256=eQsnpTjDaxrBeQ8uHzXy6C6sj8OvenFv9zwkFlytQYM,1535
7
- biofiles/repeatmasker.py,sha256=DqD1z1hUfCP4-qnfjF-oMF-ZpW_6XhOf_nzA8VHhQbw,3079
8
- biofiles/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- biofiles/types/feature.py,sha256=N6IIip7YqtSib5w_VLX1cBVwja8iWfa5AJncsKBs1PU,1209
10
- biofiles/types/repeat.py,sha256=63SqzAwEGIDIGP9pxC85RUdwXbbSm0S5WNL3lSiWlmc,641
11
- biofiles/types/sequence.py,sha256=EOw_oKuMR0THpCYJqVE__27z7qrRqcdIPrRWTL4OFMw,152
12
- biofiles-0.0.8.dist-info/LICENSE,sha256=CbR8ssdFyViKj25JAlMjIt1_FbiZ1tAC5t-uwUbxqak,1070
13
- biofiles-0.0.8.dist-info/METADATA,sha256=B0rgF4FGa2lgMehk6LdOEhHB2jddaoc76fteG3p4dp0,3033
14
- biofiles-0.0.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
15
- biofiles-0.0.8.dist-info/top_level.txt,sha256=laFaFv8hpkI4U-Pgs0yBaAJXN2_CJKl7jb-m3-tGfSc,9
16
- biofiles-0.0.8.dist-info/RECORD,,