biofiles 0.0.14__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biofiles/dialects/detector.py +74 -0
- biofiles/dialects/gencode.py +235 -0
- biofiles/dialects/genomic_base.py +25 -0
- biofiles/dialects/refseq.py +142 -0
- biofiles/dialects/stringtie.py +24 -0
- biofiles/gff.py +49 -44
- biofiles/gtf.py +34 -11
- biofiles/types/feature.py +268 -34
- biofiles/utility/cli.py +2 -1
- biofiles/utility/feature.py +180 -211
- {biofiles-0.0.14.dist-info → biofiles-0.1.1.dist-info}/METADATA +9 -4
- biofiles-0.1.1.dist-info/RECORD +27 -0
- biofiles/dialects/havana_ensembl.py +0 -101
- biofiles/types/feature_v2.py +0 -105
- biofiles/utility/feature_v2.py +0 -148
- biofiles-0.0.14.dist-info/RECORD +0 -25
- {biofiles-0.0.14.dist-info → biofiles-0.1.1.dist-info}/WHEEL +0 -0
- {biofiles-0.0.14.dist-info → biofiles-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {biofiles-0.0.14.dist-info → biofiles-0.1.1.dist-info}/top_level.txt +0 -0
biofiles/types/feature_v2.py
DELETED
@@ -1,105 +0,0 @@
|
|
1
|
-
from dataclasses import dataclass, Field, field as dataclass_field
|
2
|
-
from typing import dataclass_transform
|
3
|
-
|
4
|
-
from biofiles.common import Strand
|
5
|
-
|
6
|
-
|
7
|
-
@dataclass
|
8
|
-
class Relation:
|
9
|
-
id_field_name: str
|
10
|
-
inverse: "InverseRelation | None" = None
|
11
|
-
class_: type | None = None
|
12
|
-
|
13
|
-
|
14
|
-
@dataclass
|
15
|
-
class InverseRelation:
|
16
|
-
inverse: Relation
|
17
|
-
one_to_one: bool
|
18
|
-
class_: type | None = None
|
19
|
-
|
20
|
-
|
21
|
-
@dataclass_transform()
|
22
|
-
class FeatureMetaclass(type):
|
23
|
-
__id_field_name__: str
|
24
|
-
__filter_type__: str
|
25
|
-
__filter_starts__: Relation | None
|
26
|
-
__filter_ends__: Relation | None
|
27
|
-
|
28
|
-
def __new__(
|
29
|
-
cls,
|
30
|
-
name,
|
31
|
-
bases,
|
32
|
-
namespace,
|
33
|
-
type: str | None = None,
|
34
|
-
starts: Field | None = None,
|
35
|
-
ends: Field | None = None,
|
36
|
-
):
|
37
|
-
result = super().__new__(cls, name, bases, namespace)
|
38
|
-
result.__id_field_name__ = ""
|
39
|
-
for key, value in namespace.items():
|
40
|
-
match value:
|
41
|
-
case Field(metadata={"id_field_name": id_field_name}):
|
42
|
-
if result.__id_field_name__:
|
43
|
-
raise TypeError(
|
44
|
-
f"should specify exactly one id_field() in class {result.__name__}"
|
45
|
-
)
|
46
|
-
result.__id_field_name__ = id_field_name
|
47
|
-
case Field(metadata={"relation": Relation() as r}):
|
48
|
-
r.class_ = result
|
49
|
-
if key in result.__annotations__:
|
50
|
-
# TODO handle optionality and forward refs
|
51
|
-
r.inverse.class_ = result.__annotations__[key]
|
52
|
-
case Field(metadata={"relation": InverseRelation() as r}):
|
53
|
-
r.class_ = result
|
54
|
-
# TODO calculating r.inverse.class_ based on type annotation
|
55
|
-
|
56
|
-
if type is not None:
|
57
|
-
result.__filter_type__ = type
|
58
|
-
result.__filter_starts__ = None
|
59
|
-
if starts is not None:
|
60
|
-
result.__filter_starts__ = starts.metadata["relation"]
|
61
|
-
result.__filter_ends__ = None
|
62
|
-
if ends is not None:
|
63
|
-
result.__filter_ends__ = ends.metadata["relation"]
|
64
|
-
|
65
|
-
# TODO generate dataclass-like __init__ method,
|
66
|
-
# keep all relations optional
|
67
|
-
|
68
|
-
return result
|
69
|
-
|
70
|
-
|
71
|
-
class Feature(metaclass=FeatureMetaclass):
|
72
|
-
sequence_id: str
|
73
|
-
source: str
|
74
|
-
type_: str
|
75
|
-
|
76
|
-
start_original: int
|
77
|
-
end_original: int
|
78
|
-
# Original values as they were present in the file (1-based inclusive for .gff and .gtf).
|
79
|
-
|
80
|
-
start_c: int
|
81
|
-
end_c: int
|
82
|
-
# Standardized ("C-style") 0-based values, start inclusive, end exclusive.
|
83
|
-
|
84
|
-
score: float | None
|
85
|
-
strand: Strand | None
|
86
|
-
phase: int | None
|
87
|
-
attributes: dict[str, str]
|
88
|
-
|
89
|
-
|
90
|
-
def id_field(source: str) -> Field:
|
91
|
-
return dataclass_field(metadata={"id_field_name": source})
|
92
|
-
|
93
|
-
|
94
|
-
def field(source: str) -> Field:
|
95
|
-
return dataclass_field(metadata={"field_name": source})
|
96
|
-
|
97
|
-
|
98
|
-
def relation(source: str, *, one_to_one: bool = False) -> tuple[Field, Field]:
|
99
|
-
forward = Relation(id_field_name=source)
|
100
|
-
inverse = InverseRelation(inverse=forward, one_to_one=one_to_one)
|
101
|
-
forward.inverse = inverse
|
102
|
-
|
103
|
-
return dataclass_field(metadata={"relation": forward}), dataclass_field(
|
104
|
-
metadata={"relation": inverse}
|
105
|
-
)
|
biofiles/utility/feature_v2.py
DELETED
@@ -1,148 +0,0 @@
|
|
1
|
-
from collections import deque, defaultdict
|
2
|
-
from dataclasses import dataclass, field
|
3
|
-
from pathlib import Path
|
4
|
-
from typing import Any, Iterator, TextIO
|
5
|
-
|
6
|
-
from biofiles.common import Strand, Reader
|
7
|
-
from biofiles.types.feature_v2 import Feature, FeatureMetaclass, Relation
|
8
|
-
|
9
|
-
|
10
|
-
@dataclass
|
11
|
-
class FeatureDraft:
|
12
|
-
idx: int
|
13
|
-
sequence_id: str
|
14
|
-
source: str
|
15
|
-
type_: str
|
16
|
-
start_original: int
|
17
|
-
end_original: int
|
18
|
-
score: float | None
|
19
|
-
strand: Strand | None
|
20
|
-
phase: int | None
|
21
|
-
attributes: dict[str, str]
|
22
|
-
|
23
|
-
class_: type | None = None
|
24
|
-
id: Any = None
|
25
|
-
finalized: Feature | None = None
|
26
|
-
|
27
|
-
|
28
|
-
class FeatureTypes:
|
29
|
-
def __init__(self, feature_types: list[FeatureMetaclass]) -> None:
|
30
|
-
for ft in feature_types:
|
31
|
-
if not ft.__id_field_name__:
|
32
|
-
raise ValueError(
|
33
|
-
f"{ft.__name__} is not proper feature type - has no id_field()"
|
34
|
-
)
|
35
|
-
|
36
|
-
self.ambiguous_type_mapping: dict[str, list[FeatureMetaclass]] = defaultdict(
|
37
|
-
list
|
38
|
-
)
|
39
|
-
self.unique_type_mapping: dict[str, FeatureMetaclass] = {}
|
40
|
-
|
41
|
-
for ft in feature_types:
|
42
|
-
self.ambiguous_type_mapping[ft.__filter_type__].append(ft)
|
43
|
-
|
44
|
-
for key, fts in [*self.ambiguous_type_mapping.items()]:
|
45
|
-
if len(fts) == 1:
|
46
|
-
self.unique_type_mapping[key] = fts[0]
|
47
|
-
del self.ambiguous_type_mapping[key]
|
48
|
-
continue
|
49
|
-
self.ambiguous_type_mapping[key] = _sort_by_filter_specificity(fts)
|
50
|
-
|
51
|
-
|
52
|
-
def _sort_by_filter_specificity(fts: list[FeatureMetaclass]) -> list[FeatureMetaclass]:
|
53
|
-
"""Sort feature classes by their filter specificity, most specific -> least specific."""
|
54
|
-
key = lambda ft: bool(ft.__filter_starts__) + bool(ft.__filter_ends__)
|
55
|
-
return sorted(fts, key=key, reverse=True)
|
56
|
-
|
57
|
-
|
58
|
-
@dataclass
|
59
|
-
class FeatureDrafts:
|
60
|
-
feature_types: FeatureTypes
|
61
|
-
drafts: list[FeatureDraft] = field(default_factory=deque)
|
62
|
-
by_class_and_id: dict[tuple[type, Any], FeatureDraft] = field(default_factory=dict)
|
63
|
-
|
64
|
-
def add(self, draft: FeatureDraft) -> None:
|
65
|
-
self.drafts.append(draft)
|
66
|
-
if class_ := self.feature_types.unique_type_mapping.get(draft.type_):
|
67
|
-
draft.class_ = class_
|
68
|
-
draft.id = draft.attributes[class_.__id_field_name__]
|
69
|
-
self.register(draft)
|
70
|
-
|
71
|
-
def register(self, draft: FeatureDraft) -> None:
|
72
|
-
if (key := (draft.class_, draft.id)) in self.by_class_and_id:
|
73
|
-
raise ValueError(
|
74
|
-
f"duplicate feature ID {draft.id} for class {class_.__name__}"
|
75
|
-
)
|
76
|
-
self.by_class_and_id[key] = draft
|
77
|
-
|
78
|
-
|
79
|
-
class FeatureReader(Reader):
|
80
|
-
|
81
|
-
def __init__(
|
82
|
-
self, input_: TextIO | Path | str, feature_types: list[FeatureMetaclass]
|
83
|
-
) -> None:
|
84
|
-
super().__init__(input_)
|
85
|
-
self._feature_types = FeatureTypes(feature_types)
|
86
|
-
|
87
|
-
def __iter__(self) -> Iterator[Feature]:
|
88
|
-
raise NotImplementedError
|
89
|
-
|
90
|
-
def _finalize_drafts(self, fds: FeatureDrafts) -> Iterator[Feature]:
|
91
|
-
self._choose_classes(fds)
|
92
|
-
pass
|
93
|
-
|
94
|
-
def _choose_classes(self, fds: FeatureDrafts) -> Iterator[Feature]:
|
95
|
-
for fd in fds.drafts:
|
96
|
-
if fd.class_:
|
97
|
-
continue
|
98
|
-
|
99
|
-
fts = self._feature_types.ambiguous_type_mapping[fd.type_]
|
100
|
-
matching_fts = [ft for ft in fts if self._check_filters(fd, ft)]
|
101
|
-
if not matching_fts:
|
102
|
-
raise ValueError(
|
103
|
-
f"no matching classes (out of {len(fts)}) for "
|
104
|
-
f"feature with type {fd.type_!r}, attributes {fd.attributes!r}"
|
105
|
-
)
|
106
|
-
if len(matching_fts) > 1:
|
107
|
-
raise ValueError(
|
108
|
-
f"too many matching classes ({len(matching_fts)}) for "
|
109
|
-
f"feature with type {fd.type_!r}, attributes {fd.attributes!r}"
|
110
|
-
)
|
111
|
-
ft = matching_fts[0]
|
112
|
-
fd.class_ = ft
|
113
|
-
fd.id = fd.attributes[ft.__id_field_name__]
|
114
|
-
fds.register(fd)
|
115
|
-
|
116
|
-
def _check_filters(
|
117
|
-
self, fds: FeatureDrafts, fd: FeatureDraft, ft: FeatureMetaclass
|
118
|
-
) -> bool:
|
119
|
-
if r := ft.__filter_starts__:
|
120
|
-
related_fd = self._get_related_feature_draft(fds, fd, r)
|
121
|
-
if fd.strand != related_fd.strand:
|
122
|
-
return False
|
123
|
-
if fd.strand == "+" and fd.start_original != related_fd.start_original:
|
124
|
-
return False
|
125
|
-
if fd.strand == "-" and fd.end_original != related_fd.end_original:
|
126
|
-
return False
|
127
|
-
if r := ft.__filter_ends__:
|
128
|
-
related_fd = self._get_related_feature_draft(fds, fd, r)
|
129
|
-
if fd.strand != related_fd.strand:
|
130
|
-
return False
|
131
|
-
if fd.strand == "+" and fd.end_original != related_fd.end_original:
|
132
|
-
return False
|
133
|
-
if fd.strand == "-" and fd.start_original != related_fd.start_original:
|
134
|
-
return False
|
135
|
-
return True
|
136
|
-
|
137
|
-
def _get_related_feature_draft(
|
138
|
-
self, fds: FeatureDrafts, fd: FeatureDraft, r: Relation
|
139
|
-
) -> FeatureDraft:
|
140
|
-
related_class = r.inverse.class_
|
141
|
-
related_id = fd.attributes[r.id_field_name]
|
142
|
-
try:
|
143
|
-
return fds.by_class_and_id[related_class, related_id]
|
144
|
-
except KeyError as exc:
|
145
|
-
raise ValueError(
|
146
|
-
f"can't find related {related_class.__name__} for "
|
147
|
-
f"{fd.class_.__name__} with attributes {fd.attributes!r}"
|
148
|
-
) from exc
|
biofiles-0.0.14.dist-info/RECORD
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
biofiles/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
biofiles/bam.py,sha256=w32LLOAuKWdGF7joTSrB4HYXCdfvvijZW44jizG36R8,6771
|
3
|
-
biofiles/common.py,sha256=Yi0i85FpD2wR3vqL645LTUAE6TybGDxxZQsUmEGHqu4,1126
|
4
|
-
biofiles/fai.py,sha256=gG2oDmaU7PIIYYBc8LiudaeKdo-6WIdwsMDIM7qi098,678
|
5
|
-
biofiles/fasta.py,sha256=ctIt5I_fcZx-xQN921zpmlZS7e9_ICf-3_i6mTs5qbs,2135
|
6
|
-
biofiles/gff.py,sha256=b3apOmJNoiy_qQHtyUSnNh0s999B6gyAODyjI7fN15g,6246
|
7
|
-
biofiles/gtf.py,sha256=jDQmQ3LB1iNxCCYExScJ6ivQM49TrRO7IPDfUe8VK3Y,2611
|
8
|
-
biofiles/repeatmasker.py,sha256=7KObXELCHQ6oBkO8yK6Znrs6MX8sfVuxNSmOMe0Ogfk,3289
|
9
|
-
biofiles/dialects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
biofiles/dialects/havana_ensembl.py,sha256=7I97U3UUiFoSOTOR0_orw5eBjCit5FyViosRZqb6AcQ,3379
|
11
|
-
biofiles/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
-
biofiles/types/alignment.py,sha256=Kc0XteLyfj1gNJNLsUgzSKzAAoMobhkJyPFsovaU7dM,1696
|
13
|
-
biofiles/types/feature.py,sha256=RKul07UEV1xgWwf8W1C6O2Okb8B0nGZXDaFEezikMsc,1315
|
14
|
-
biofiles/types/feature_v2.py,sha256=ozlNyx1sKoo82970TP7a6C_OurCGYDkceg0WdTQA05c,3222
|
15
|
-
biofiles/types/repeat.py,sha256=63SqzAwEGIDIGP9pxC85RUdwXbbSm0S5WNL3lSiWlmc,641
|
16
|
-
biofiles/types/sequence.py,sha256=XeJ3wgi8AwRaVYVKmf41y5mOmWQfdsS8ysaRLZWbNoQ,254
|
17
|
-
biofiles/utility/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
biofiles/utility/cli.py,sha256=bkUzmT5R4qdJ0YtA4LNU5JYpimD1HmZlHtoSaKzDsUc,4032
|
19
|
-
biofiles/utility/feature.py,sha256=O7KV3uI9JtUJWZNrOZ3XfVapwmEmRHm9li4XlpqtlMs,8865
|
20
|
-
biofiles/utility/feature_v2.py,sha256=ByhUCkG45d_wra8W5vz-8CMnNYMCHzNglFMMEhMKoWk,5440
|
21
|
-
biofiles-0.0.14.dist-info/licenses/LICENSE,sha256=CbR8ssdFyViKj25JAlMjIt1_FbiZ1tAC5t-uwUbxqak,1070
|
22
|
-
biofiles-0.0.14.dist-info/METADATA,sha256=DaOVBrpMhK_bKgBEU1uTjjUu6QJS7Zfx8mV4gYL2MLM,3056
|
23
|
-
biofiles-0.0.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
24
|
-
biofiles-0.0.14.dist-info/top_level.txt,sha256=laFaFv8hpkI4U-Pgs0yBaAJXN2_CJKl7jb-m3-tGfSc,9
|
25
|
-
biofiles-0.0.14.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|