biofiles 0.0.14__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,105 +0,0 @@
1
- from dataclasses import dataclass, Field, field as dataclass_field
2
- from typing import dataclass_transform
3
-
4
- from biofiles.common import Strand
5
-
6
-
7
- @dataclass
8
- class Relation:
9
- id_field_name: str
10
- inverse: "InverseRelation | None" = None
11
- class_: type | None = None
12
-
13
-
14
- @dataclass
15
- class InverseRelation:
16
- inverse: Relation
17
- one_to_one: bool
18
- class_: type | None = None
19
-
20
-
21
- @dataclass_transform()
22
- class FeatureMetaclass(type):
23
- __id_field_name__: str
24
- __filter_type__: str
25
- __filter_starts__: Relation | None
26
- __filter_ends__: Relation | None
27
-
28
- def __new__(
29
- cls,
30
- name,
31
- bases,
32
- namespace,
33
- type: str | None = None,
34
- starts: Field | None = None,
35
- ends: Field | None = None,
36
- ):
37
- result = super().__new__(cls, name, bases, namespace)
38
- result.__id_field_name__ = ""
39
- for key, value in namespace.items():
40
- match value:
41
- case Field(metadata={"id_field_name": id_field_name}):
42
- if result.__id_field_name__:
43
- raise TypeError(
44
- f"should specify exactly one id_field() in class {result.__name__}"
45
- )
46
- result.__id_field_name__ = id_field_name
47
- case Field(metadata={"relation": Relation() as r}):
48
- r.class_ = result
49
- if key in result.__annotations__:
50
- # TODO handle optionality and forward refs
51
- r.inverse.class_ = result.__annotations__[key]
52
- case Field(metadata={"relation": InverseRelation() as r}):
53
- r.class_ = result
54
- # TODO calculating r.inverse.class_ based on type annotation
55
-
56
- if type is not None:
57
- result.__filter_type__ = type
58
- result.__filter_starts__ = None
59
- if starts is not None:
60
- result.__filter_starts__ = starts.metadata["relation"]
61
- result.__filter_ends__ = None
62
- if ends is not None:
63
- result.__filter_ends__ = ends.metadata["relation"]
64
-
65
- # TODO generate dataclass-like __init__ method,
66
- # keep all relations optional
67
-
68
- return result
69
-
70
-
71
- class Feature(metaclass=FeatureMetaclass):
72
- sequence_id: str
73
- source: str
74
- type_: str
75
-
76
- start_original: int
77
- end_original: int
78
- # Original values as they were present in the file (1-based inclusive for .gff and .gtf).
79
-
80
- start_c: int
81
- end_c: int
82
- # Standardized ("C-style") 0-based values, start inclusive, end exclusive.
83
-
84
- score: float | None
85
- strand: Strand | None
86
- phase: int | None
87
- attributes: dict[str, str]
88
-
89
-
90
- def id_field(source: str) -> Field:
91
- return dataclass_field(metadata={"id_field_name": source})
92
-
93
-
94
- def field(source: str) -> Field:
95
- return dataclass_field(metadata={"field_name": source})
96
-
97
-
98
- def relation(source: str, *, one_to_one: bool = False) -> tuple[Field, Field]:
99
- forward = Relation(id_field_name=source)
100
- inverse = InverseRelation(inverse=forward, one_to_one=one_to_one)
101
- forward.inverse = inverse
102
-
103
- return dataclass_field(metadata={"relation": forward}), dataclass_field(
104
- metadata={"relation": inverse}
105
- )
@@ -1,148 +0,0 @@
1
- from collections import deque, defaultdict
2
- from dataclasses import dataclass, field
3
- from pathlib import Path
4
- from typing import Any, Iterator, TextIO
5
-
6
- from biofiles.common import Strand, Reader
7
- from biofiles.types.feature_v2 import Feature, FeatureMetaclass, Relation
8
-
9
-
10
- @dataclass
11
- class FeatureDraft:
12
- idx: int
13
- sequence_id: str
14
- source: str
15
- type_: str
16
- start_original: int
17
- end_original: int
18
- score: float | None
19
- strand: Strand | None
20
- phase: int | None
21
- attributes: dict[str, str]
22
-
23
- class_: type | None = None
24
- id: Any = None
25
- finalized: Feature | None = None
26
-
27
-
28
- class FeatureTypes:
29
- def __init__(self, feature_types: list[FeatureMetaclass]) -> None:
30
- for ft in feature_types:
31
- if not ft.__id_field_name__:
32
- raise ValueError(
33
- f"{ft.__name__} is not proper feature type - has no id_field()"
34
- )
35
-
36
- self.ambiguous_type_mapping: dict[str, list[FeatureMetaclass]] = defaultdict(
37
- list
38
- )
39
- self.unique_type_mapping: dict[str, FeatureMetaclass] = {}
40
-
41
- for ft in feature_types:
42
- self.ambiguous_type_mapping[ft.__filter_type__].append(ft)
43
-
44
- for key, fts in [*self.ambiguous_type_mapping.items()]:
45
- if len(fts) == 1:
46
- self.unique_type_mapping[key] = fts[0]
47
- del self.ambiguous_type_mapping[key]
48
- continue
49
- self.ambiguous_type_mapping[key] = _sort_by_filter_specificity(fts)
50
-
51
-
52
- def _sort_by_filter_specificity(fts: list[FeatureMetaclass]) -> list[FeatureMetaclass]:
53
- """Sort feature classes by their filter specificity, most specific -> least specific."""
54
- key = lambda ft: bool(ft.__filter_starts__) + bool(ft.__filter_ends__)
55
- return sorted(fts, key=key, reverse=True)
56
-
57
-
58
- @dataclass
59
- class FeatureDrafts:
60
- feature_types: FeatureTypes
61
- drafts: list[FeatureDraft] = field(default_factory=deque)
62
- by_class_and_id: dict[tuple[type, Any], FeatureDraft] = field(default_factory=dict)
63
-
64
- def add(self, draft: FeatureDraft) -> None:
65
- self.drafts.append(draft)
66
- if class_ := self.feature_types.unique_type_mapping.get(draft.type_):
67
- draft.class_ = class_
68
- draft.id = draft.attributes[class_.__id_field_name__]
69
- self.register(draft)
70
-
71
- def register(self, draft: FeatureDraft) -> None:
72
- if (key := (draft.class_, draft.id)) in self.by_class_and_id:
73
- raise ValueError(
74
- f"duplicate feature ID {draft.id} for class {class_.__name__}"
75
- )
76
- self.by_class_and_id[key] = draft
77
-
78
-
79
- class FeatureReader(Reader):
80
-
81
- def __init__(
82
- self, input_: TextIO | Path | str, feature_types: list[FeatureMetaclass]
83
- ) -> None:
84
- super().__init__(input_)
85
- self._feature_types = FeatureTypes(feature_types)
86
-
87
- def __iter__(self) -> Iterator[Feature]:
88
- raise NotImplementedError
89
-
90
- def _finalize_drafts(self, fds: FeatureDrafts) -> Iterator[Feature]:
91
- self._choose_classes(fds)
92
- pass
93
-
94
- def _choose_classes(self, fds: FeatureDrafts) -> Iterator[Feature]:
95
- for fd in fds.drafts:
96
- if fd.class_:
97
- continue
98
-
99
- fts = self._feature_types.ambiguous_type_mapping[fd.type_]
100
- matching_fts = [ft for ft in fts if self._check_filters(fd, ft)]
101
- if not matching_fts:
102
- raise ValueError(
103
- f"no matching classes (out of {len(fts)}) for "
104
- f"feature with type {fd.type_!r}, attributes {fd.attributes!r}"
105
- )
106
- if len(matching_fts) > 1:
107
- raise ValueError(
108
- f"too many matching classes ({len(matching_fts)}) for "
109
- f"feature with type {fd.type_!r}, attributes {fd.attributes!r}"
110
- )
111
- ft = matching_fts[0]
112
- fd.class_ = ft
113
- fd.id = fd.attributes[ft.__id_field_name__]
114
- fds.register(fd)
115
-
116
- def _check_filters(
117
- self, fds: FeatureDrafts, fd: FeatureDraft, ft: FeatureMetaclass
118
- ) -> bool:
119
- if r := ft.__filter_starts__:
120
- related_fd = self._get_related_feature_draft(fds, fd, r)
121
- if fd.strand != related_fd.strand:
122
- return False
123
- if fd.strand == "+" and fd.start_original != related_fd.start_original:
124
- return False
125
- if fd.strand == "-" and fd.end_original != related_fd.end_original:
126
- return False
127
- if r := ft.__filter_ends__:
128
- related_fd = self._get_related_feature_draft(fds, fd, r)
129
- if fd.strand != related_fd.strand:
130
- return False
131
- if fd.strand == "+" and fd.end_original != related_fd.end_original:
132
- return False
133
- if fd.strand == "-" and fd.start_original != related_fd.start_original:
134
- return False
135
- return True
136
-
137
- def _get_related_feature_draft(
138
- self, fds: FeatureDrafts, fd: FeatureDraft, r: Relation
139
- ) -> FeatureDraft:
140
- related_class = r.inverse.class_
141
- related_id = fd.attributes[r.id_field_name]
142
- try:
143
- return fds.by_class_and_id[related_class, related_id]
144
- except KeyError as exc:
145
- raise ValueError(
146
- f"can't find related {related_class.__name__} for "
147
- f"{fd.class_.__name__} with attributes {fd.attributes!r}"
148
- ) from exc
@@ -1,25 +0,0 @@
1
- biofiles/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- biofiles/bam.py,sha256=w32LLOAuKWdGF7joTSrB4HYXCdfvvijZW44jizG36R8,6771
3
- biofiles/common.py,sha256=Yi0i85FpD2wR3vqL645LTUAE6TybGDxxZQsUmEGHqu4,1126
4
- biofiles/fai.py,sha256=gG2oDmaU7PIIYYBc8LiudaeKdo-6WIdwsMDIM7qi098,678
5
- biofiles/fasta.py,sha256=ctIt5I_fcZx-xQN921zpmlZS7e9_ICf-3_i6mTs5qbs,2135
6
- biofiles/gff.py,sha256=b3apOmJNoiy_qQHtyUSnNh0s999B6gyAODyjI7fN15g,6246
7
- biofiles/gtf.py,sha256=jDQmQ3LB1iNxCCYExScJ6ivQM49TrRO7IPDfUe8VK3Y,2611
8
- biofiles/repeatmasker.py,sha256=7KObXELCHQ6oBkO8yK6Znrs6MX8sfVuxNSmOMe0Ogfk,3289
9
- biofiles/dialects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- biofiles/dialects/havana_ensembl.py,sha256=7I97U3UUiFoSOTOR0_orw5eBjCit5FyViosRZqb6AcQ,3379
11
- biofiles/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- biofiles/types/alignment.py,sha256=Kc0XteLyfj1gNJNLsUgzSKzAAoMobhkJyPFsovaU7dM,1696
13
- biofiles/types/feature.py,sha256=RKul07UEV1xgWwf8W1C6O2Okb8B0nGZXDaFEezikMsc,1315
14
- biofiles/types/feature_v2.py,sha256=ozlNyx1sKoo82970TP7a6C_OurCGYDkceg0WdTQA05c,3222
15
- biofiles/types/repeat.py,sha256=63SqzAwEGIDIGP9pxC85RUdwXbbSm0S5WNL3lSiWlmc,641
16
- biofiles/types/sequence.py,sha256=XeJ3wgi8AwRaVYVKmf41y5mOmWQfdsS8ysaRLZWbNoQ,254
17
- biofiles/utility/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- biofiles/utility/cli.py,sha256=bkUzmT5R4qdJ0YtA4LNU5JYpimD1HmZlHtoSaKzDsUc,4032
19
- biofiles/utility/feature.py,sha256=O7KV3uI9JtUJWZNrOZ3XfVapwmEmRHm9li4XlpqtlMs,8865
20
- biofiles/utility/feature_v2.py,sha256=ByhUCkG45d_wra8W5vz-8CMnNYMCHzNglFMMEhMKoWk,5440
21
- biofiles-0.0.14.dist-info/licenses/LICENSE,sha256=CbR8ssdFyViKj25JAlMjIt1_FbiZ1tAC5t-uwUbxqak,1070
22
- biofiles-0.0.14.dist-info/METADATA,sha256=DaOVBrpMhK_bKgBEU1uTjjUu6QJS7Zfx8mV4gYL2MLM,3056
23
- biofiles-0.0.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
- biofiles-0.0.14.dist-info/top_level.txt,sha256=laFaFv8hpkI4U-Pgs0yBaAJXN2_CJKl7jb-m3-tGfSc,9
25
- biofiles-0.0.14.dist-info/RECORD,,